{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "from sklearn.model_selection import train_test_split"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "weather = pd.read_csv(r\"C:\\work\\learnbetter\\micro-class\\week 8 SVM (2)\\data\\weatherAUS5000.csv\",index_col=0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Date</th>\n",
       "      <th>Location</th>\n",
       "      <th>MinTemp</th>\n",
       "      <th>MaxTemp</th>\n",
       "      <th>Rainfall</th>\n",
       "      <th>Evaporation</th>\n",
       "      <th>Sunshine</th>\n",
       "      <th>WindGustDir</th>\n",
       "      <th>WindGustSpeed</th>\n",
       "      <th>WindDir9am</th>\n",
       "      <th>...</th>\n",
       "      <th>WindSpeed3pm</th>\n",
       "      <th>Humidity9am</th>\n",
       "      <th>Humidity3pm</th>\n",
       "      <th>Pressure9am</th>\n",
       "      <th>Pressure3pm</th>\n",
       "      <th>Cloud9am</th>\n",
       "      <th>Cloud3pm</th>\n",
       "      <th>Temp9am</th>\n",
       "      <th>Temp3pm</th>\n",
       "      <th>RainTomorrow</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2015-03-24</td>\n",
       "      <td>Adelaide</td>\n",
       "      <td>12.3</td>\n",
       "      <td>19.3</td>\n",
       "      <td>0.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>S</td>\n",
       "      <td>39.0</td>\n",
       "      <td>S</td>\n",
       "      <td>...</td>\n",
       "      <td>19.0</td>\n",
       "      <td>59.0</td>\n",
       "      <td>47.0</td>\n",
       "      <td>1022.2</td>\n",
       "      <td>1021.4</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>15.1</td>\n",
       "      <td>17.7</td>\n",
       "      <td>No</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2011-07-12</td>\n",
       "      <td>Adelaide</td>\n",
       "      <td>7.9</td>\n",
       "      <td>11.4</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.5</td>\n",
       "      <td>N</td>\n",
       "      <td>20.0</td>\n",
       "      <td>NNE</td>\n",
       "      <td>...</td>\n",
       "      <td>7.0</td>\n",
       "      <td>70.0</td>\n",
       "      <td>59.0</td>\n",
       "      <td>1028.7</td>\n",
       "      <td>1025.7</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>8.4</td>\n",
       "      <td>11.3</td>\n",
       "      <td>No</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2010-02-08</td>\n",
       "      <td>Adelaide</td>\n",
       "      <td>24.0</td>\n",
       "      <td>38.1</td>\n",
       "      <td>0.0</td>\n",
       "      <td>23.4</td>\n",
       "      <td>13.0</td>\n",
       "      <td>SE</td>\n",
       "      <td>39.0</td>\n",
       "      <td>NNE</td>\n",
       "      <td>...</td>\n",
       "      <td>19.0</td>\n",
       "      <td>36.0</td>\n",
       "      <td>24.0</td>\n",
       "      <td>1018.0</td>\n",
       "      <td>1016.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>32.4</td>\n",
       "      <td>37.4</td>\n",
       "      <td>No</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2016-09-19</td>\n",
       "      <td>Adelaide</td>\n",
       "      <td>6.7</td>\n",
       "      <td>16.4</td>\n",
       "      <td>0.4</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>N</td>\n",
       "      <td>31.0</td>\n",
       "      <td>N</td>\n",
       "      <td>...</td>\n",
       "      <td>15.0</td>\n",
       "      <td>65.0</td>\n",
       "      <td>40.0</td>\n",
       "      <td>1014.4</td>\n",
       "      <td>1010.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>11.2</td>\n",
       "      <td>15.9</td>\n",
       "      <td>No</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2014-03-05</td>\n",
       "      <td>Adelaide</td>\n",
       "      <td>16.7</td>\n",
       "      <td>24.8</td>\n",
       "      <td>0.0</td>\n",
       "      <td>6.6</td>\n",
       "      <td>11.7</td>\n",
       "      <td>S</td>\n",
       "      <td>37.0</td>\n",
       "      <td>S</td>\n",
       "      <td>...</td>\n",
       "      <td>24.0</td>\n",
       "      <td>61.0</td>\n",
       "      <td>48.0</td>\n",
       "      <td>1019.3</td>\n",
       "      <td>1018.9</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>20.8</td>\n",
       "      <td>23.7</td>\n",
       "      <td>No</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 22 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "         Date  Location  MinTemp  MaxTemp  Rainfall  Evaporation  Sunshine  \\\n",
       "0  2015-03-24  Adelaide     12.3     19.3       0.0          5.0       NaN   \n",
       "1  2011-07-12  Adelaide      7.9     11.4       0.0          1.0       0.5   \n",
       "2  2010-02-08  Adelaide     24.0     38.1       0.0         23.4      13.0   \n",
       "3  2016-09-19  Adelaide      6.7     16.4       0.4          NaN       NaN   \n",
       "4  2014-03-05  Adelaide     16.7     24.8       0.0          6.6      11.7   \n",
       "\n",
       "  WindGustDir  WindGustSpeed WindDir9am      ...      WindSpeed3pm  \\\n",
       "0           S           39.0          S      ...              19.0   \n",
       "1           N           20.0        NNE      ...               7.0   \n",
       "2          SE           39.0        NNE      ...              19.0   \n",
       "3           N           31.0          N      ...              15.0   \n",
       "4           S           37.0          S      ...              24.0   \n",
       "\n",
       "   Humidity9am  Humidity3pm  Pressure9am  Pressure3pm  Cloud9am  Cloud3pm  \\\n",
       "0         59.0         47.0       1022.2       1021.4       NaN       NaN   \n",
       "1         70.0         59.0       1028.7       1025.7       NaN       NaN   \n",
       "2         36.0         24.0       1018.0       1016.0       NaN       NaN   \n",
       "3         65.0         40.0       1014.4       1010.0       NaN       NaN   \n",
       "4         61.0         48.0       1019.3       1018.9       NaN       NaN   \n",
       "\n",
       "   Temp9am  Temp3pm  RainTomorrow  \n",
       "0     15.1     17.7            No  \n",
       "1      8.4     11.3            No  \n",
       "2     32.4     37.4            No  \n",
       "3     11.2     15.9            No  \n",
       "4     20.8     23.7            No  \n",
       "\n",
       "[5 rows x 22 columns]"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "weather.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "#将特征矩阵和标签Y分开\n",
    "X = weather.iloc[:,:-1]\n",
    "Y = weather.iloc[:,-1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "#分裂的快捷键：ctrl shift -\n",
    "\n",
    "#合并的快捷键：shift M"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(5000, 21)"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X.shape #5000行是我随机选的"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "Int64Index: 5000 entries, 0 to 4999\n",
      "Data columns (total 21 columns):\n",
      "Date             5000 non-null object\n",
      "Location         5000 non-null object\n",
      "MinTemp          4979 non-null float64\n",
      "MaxTemp          4987 non-null float64\n",
      "Rainfall         4950 non-null float64\n",
      "Evaporation      2841 non-null float64\n",
      "Sunshine         2571 non-null float64\n",
      "WindGustDir      4669 non-null object\n",
      "WindGustSpeed    4669 non-null float64\n",
      "WindDir9am       4651 non-null object\n",
      "WindDir3pm       4887 non-null object\n",
      "WindSpeed9am     4949 non-null float64\n",
      "WindSpeed3pm     4919 non-null float64\n",
      "Humidity9am      4936 non-null float64\n",
      "Humidity3pm      4880 non-null float64\n",
      "Pressure9am      4506 non-null float64\n",
      "Pressure3pm      4504 non-null float64\n",
      "Cloud9am         3111 non-null float64\n",
      "Cloud3pm         3012 non-null float64\n",
      "Temp9am          4967 non-null float64\n",
      "Temp3pm          4912 non-null float64\n",
      "dtypes: float64(16), object(5)\n",
      "memory usage: 859.4+ KB\n"
     ]
    }
   ],
   "source": [
    "#探索数据类型\n",
    "X.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Date             0.0000\n",
       "Location         0.0000\n",
       "MinTemp          0.0042\n",
       "MaxTemp          0.0026\n",
       "Rainfall         0.0100\n",
       "Evaporation      0.4318\n",
       "Sunshine         0.4858\n",
       "WindGustDir      0.0662\n",
       "WindGustSpeed    0.0662\n",
       "WindDir9am       0.0698\n",
       "WindDir3pm       0.0226\n",
       "WindSpeed9am     0.0102\n",
       "WindSpeed3pm     0.0162\n",
       "Humidity9am      0.0128\n",
       "Humidity3pm      0.0240\n",
       "Pressure9am      0.0988\n",
       "Pressure3pm      0.0992\n",
       "Cloud9am         0.3778\n",
       "Cloud3pm         0.3976\n",
       "Temp9am          0.0066\n",
       "Temp3pm          0.0176\n",
       "dtype: float64"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#探索缺失值\n",
    "X.isnull().mean() #缺失值所占总值的比例 isnull().sum(全部的True)/X.shape[0]\n",
    "#我们要有不同的缺失值填补策略"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "#在上方添加一个新的cell ESC a enter"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "#在下方添加一个新的cell ESC b enter"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "#删除一个cell ESC d d"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(5000,)"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "Y.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "Y.isnull().sum() #加和的时候，True是1，False是0"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array(['No', 'Yes'], dtype=object)"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#探索标签的分类\n",
    "np.unique(Y) #我们的标签是二分类"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "#分训练集和测试集\n",
    "Xtrain, Xtest, Ytrain, Ytest = train_test_split(X,Y,test_size=0.3,random_state=420) #随机抽样"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Date</th>\n",
       "      <th>Location</th>\n",
       "      <th>MinTemp</th>\n",
       "      <th>MaxTemp</th>\n",
       "      <th>Rainfall</th>\n",
       "      <th>Evaporation</th>\n",
       "      <th>Sunshine</th>\n",
       "      <th>WindGustDir</th>\n",
       "      <th>WindGustSpeed</th>\n",
       "      <th>WindDir9am</th>\n",
       "      <th>...</th>\n",
       "      <th>WindSpeed9am</th>\n",
       "      <th>WindSpeed3pm</th>\n",
       "      <th>Humidity9am</th>\n",
       "      <th>Humidity3pm</th>\n",
       "      <th>Pressure9am</th>\n",
       "      <th>Pressure3pm</th>\n",
       "      <th>Cloud9am</th>\n",
       "      <th>Cloud3pm</th>\n",
       "      <th>Temp9am</th>\n",
       "      <th>Temp3pm</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1809</th>\n",
       "      <td>2015-08-24</td>\n",
       "      <td>Katherine</td>\n",
       "      <td>17.5</td>\n",
       "      <td>36.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>8.8</td>\n",
       "      <td>NaN</td>\n",
       "      <td>ESE</td>\n",
       "      <td>26.0</td>\n",
       "      <td>NNW</td>\n",
       "      <td>...</td>\n",
       "      <td>17.0</td>\n",
       "      <td>15.0</td>\n",
       "      <td>57.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1016.8</td>\n",
       "      <td>1012.2</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>27.5</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4176</th>\n",
       "      <td>2016-12-10</td>\n",
       "      <td>Tuggeranong</td>\n",
       "      <td>9.5</td>\n",
       "      <td>25.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NNW</td>\n",
       "      <td>33.0</td>\n",
       "      <td>NE</td>\n",
       "      <td>...</td>\n",
       "      <td>7.0</td>\n",
       "      <td>17.0</td>\n",
       "      <td>59.0</td>\n",
       "      <td>31.0</td>\n",
       "      <td>1020.4</td>\n",
       "      <td>1017.5</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>14.6</td>\n",
       "      <td>23.6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>110</th>\n",
       "      <td>2010-04-18</td>\n",
       "      <td>Albany</td>\n",
       "      <td>13.0</td>\n",
       "      <td>22.6</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3.8</td>\n",
       "      <td>10.4</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NE</td>\n",
       "      <td>...</td>\n",
       "      <td>17.0</td>\n",
       "      <td>31.0</td>\n",
       "      <td>79.0</td>\n",
       "      <td>68.0</td>\n",
       "      <td>1020.3</td>\n",
       "      <td>1015.7</td>\n",
       "      <td>1.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>17.5</td>\n",
       "      <td>20.8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3582</th>\n",
       "      <td>2009-11-26</td>\n",
       "      <td>Sale</td>\n",
       "      <td>13.9</td>\n",
       "      <td>29.8</td>\n",
       "      <td>0.0</td>\n",
       "      <td>5.8</td>\n",
       "      <td>5.1</td>\n",
       "      <td>S</td>\n",
       "      <td>37.0</td>\n",
       "      <td>N</td>\n",
       "      <td>...</td>\n",
       "      <td>11.0</td>\n",
       "      <td>28.0</td>\n",
       "      <td>82.0</td>\n",
       "      <td>44.0</td>\n",
       "      <td>1012.5</td>\n",
       "      <td>1005.9</td>\n",
       "      <td>6.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>18.5</td>\n",
       "      <td>27.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2162</th>\n",
       "      <td>2014-04-25</td>\n",
       "      <td>Mildura</td>\n",
       "      <td>6.0</td>\n",
       "      <td>23.5</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.8</td>\n",
       "      <td>8.6</td>\n",
       "      <td>NNE</td>\n",
       "      <td>24.0</td>\n",
       "      <td>E</td>\n",
       "      <td>...</td>\n",
       "      <td>15.0</td>\n",
       "      <td>15.0</td>\n",
       "      <td>58.0</td>\n",
       "      <td>35.0</td>\n",
       "      <td>1019.8</td>\n",
       "      <td>1014.1</td>\n",
       "      <td>2.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>12.4</td>\n",
       "      <td>22.4</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 21 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "            Date     Location  MinTemp  MaxTemp  Rainfall  Evaporation  \\\n",
       "1809  2015-08-24    Katherine     17.5     36.0       0.0          8.8   \n",
       "4176  2016-12-10  Tuggeranong      9.5     25.0       0.0          NaN   \n",
       "110   2010-04-18       Albany     13.0     22.6       0.0          3.8   \n",
       "3582  2009-11-26         Sale     13.9     29.8       0.0          5.8   \n",
       "2162  2014-04-25      Mildura      6.0     23.5       0.0          2.8   \n",
       "\n",
       "      Sunshine WindGustDir  WindGustSpeed WindDir9am   ...    WindSpeed9am  \\\n",
       "1809       NaN         ESE           26.0        NNW   ...            17.0   \n",
       "4176       NaN         NNW           33.0         NE   ...             7.0   \n",
       "110       10.4         NaN            NaN         NE   ...            17.0   \n",
       "3582       5.1           S           37.0          N   ...            11.0   \n",
       "2162       8.6         NNE           24.0          E   ...            15.0   \n",
       "\n",
       "      WindSpeed3pm  Humidity9am  Humidity3pm  Pressure9am  Pressure3pm  \\\n",
       "1809          15.0         57.0          NaN       1016.8       1012.2   \n",
       "4176          17.0         59.0         31.0       1020.4       1017.5   \n",
       "110           31.0         79.0         68.0       1020.3       1015.7   \n",
       "3582          28.0         82.0         44.0       1012.5       1005.9   \n",
       "2162          15.0         58.0         35.0       1019.8       1014.1   \n",
       "\n",
       "      Cloud9am  Cloud3pm  Temp9am  Temp3pm  \n",
       "1809       0.0       NaN     27.5      NaN  \n",
       "4176       NaN       NaN     14.6     23.6  \n",
       "110        1.0       3.0     17.5     20.8  \n",
       "3582       6.0       6.0     18.5     27.5  \n",
       "2162       2.0       4.0     12.4     22.4  \n",
       "\n",
       "[5 rows x 21 columns]"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "Xtrain.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "#恢复索引\n",
    "for i in [Xtrain, Xtest, Ytrain, Ytest]:\n",
    "    i.index = range(i.shape[0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Date</th>\n",
       "      <th>Location</th>\n",
       "      <th>MinTemp</th>\n",
       "      <th>MaxTemp</th>\n",
       "      <th>Rainfall</th>\n",
       "      <th>Evaporation</th>\n",
       "      <th>Sunshine</th>\n",
       "      <th>WindGustDir</th>\n",
       "      <th>WindGustSpeed</th>\n",
       "      <th>WindDir9am</th>\n",
       "      <th>...</th>\n",
       "      <th>WindSpeed9am</th>\n",
       "      <th>WindSpeed3pm</th>\n",
       "      <th>Humidity9am</th>\n",
       "      <th>Humidity3pm</th>\n",
       "      <th>Pressure9am</th>\n",
       "      <th>Pressure3pm</th>\n",
       "      <th>Cloud9am</th>\n",
       "      <th>Cloud3pm</th>\n",
       "      <th>Temp9am</th>\n",
       "      <th>Temp3pm</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2015-08-24</td>\n",
       "      <td>Katherine</td>\n",
       "      <td>17.5</td>\n",
       "      <td>36.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>8.8</td>\n",
       "      <td>NaN</td>\n",
       "      <td>ESE</td>\n",
       "      <td>26.0</td>\n",
       "      <td>NNW</td>\n",
       "      <td>...</td>\n",
       "      <td>17.0</td>\n",
       "      <td>15.0</td>\n",
       "      <td>57.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1016.8</td>\n",
       "      <td>1012.2</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>27.5</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2016-12-10</td>\n",
       "      <td>Tuggeranong</td>\n",
       "      <td>9.5</td>\n",
       "      <td>25.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NNW</td>\n",
       "      <td>33.0</td>\n",
       "      <td>NE</td>\n",
       "      <td>...</td>\n",
       "      <td>7.0</td>\n",
       "      <td>17.0</td>\n",
       "      <td>59.0</td>\n",
       "      <td>31.0</td>\n",
       "      <td>1020.4</td>\n",
       "      <td>1017.5</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>14.6</td>\n",
       "      <td>23.6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2010-04-18</td>\n",
       "      <td>Albany</td>\n",
       "      <td>13.0</td>\n",
       "      <td>22.6</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3.8</td>\n",
       "      <td>10.4</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NE</td>\n",
       "      <td>...</td>\n",
       "      <td>17.0</td>\n",
       "      <td>31.0</td>\n",
       "      <td>79.0</td>\n",
       "      <td>68.0</td>\n",
       "      <td>1020.3</td>\n",
       "      <td>1015.7</td>\n",
       "      <td>1.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>17.5</td>\n",
       "      <td>20.8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2009-11-26</td>\n",
       "      <td>Sale</td>\n",
       "      <td>13.9</td>\n",
       "      <td>29.8</td>\n",
       "      <td>0.0</td>\n",
       "      <td>5.8</td>\n",
       "      <td>5.1</td>\n",
       "      <td>S</td>\n",
       "      <td>37.0</td>\n",
       "      <td>N</td>\n",
       "      <td>...</td>\n",
       "      <td>11.0</td>\n",
       "      <td>28.0</td>\n",
       "      <td>82.0</td>\n",
       "      <td>44.0</td>\n",
       "      <td>1012.5</td>\n",
       "      <td>1005.9</td>\n",
       "      <td>6.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>18.5</td>\n",
       "      <td>27.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2014-04-25</td>\n",
       "      <td>Mildura</td>\n",
       "      <td>6.0</td>\n",
       "      <td>23.5</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.8</td>\n",
       "      <td>8.6</td>\n",
       "      <td>NNE</td>\n",
       "      <td>24.0</td>\n",
       "      <td>E</td>\n",
       "      <td>...</td>\n",
       "      <td>15.0</td>\n",
       "      <td>15.0</td>\n",
       "      <td>58.0</td>\n",
       "      <td>35.0</td>\n",
       "      <td>1019.8</td>\n",
       "      <td>1014.1</td>\n",
       "      <td>2.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>12.4</td>\n",
       "      <td>22.4</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 21 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "         Date     Location  MinTemp  MaxTemp  Rainfall  Evaporation  Sunshine  \\\n",
       "0  2015-08-24    Katherine     17.5     36.0       0.0          8.8       NaN   \n",
       "1  2016-12-10  Tuggeranong      9.5     25.0       0.0          NaN       NaN   \n",
       "2  2010-04-18       Albany     13.0     22.6       0.0          3.8      10.4   \n",
       "3  2009-11-26         Sale     13.9     29.8       0.0          5.8       5.1   \n",
       "4  2014-04-25      Mildura      6.0     23.5       0.0          2.8       8.6   \n",
       "\n",
       "  WindGustDir  WindGustSpeed WindDir9am   ...    WindSpeed9am  WindSpeed3pm  \\\n",
       "0         ESE           26.0        NNW   ...            17.0          15.0   \n",
       "1         NNW           33.0         NE   ...             7.0          17.0   \n",
       "2         NaN            NaN         NE   ...            17.0          31.0   \n",
       "3           S           37.0          N   ...            11.0          28.0   \n",
       "4         NNE           24.0          E   ...            15.0          15.0   \n",
       "\n",
       "   Humidity9am  Humidity3pm  Pressure9am  Pressure3pm  Cloud9am  Cloud3pm  \\\n",
       "0         57.0          NaN       1016.8       1012.2       0.0       NaN   \n",
       "1         59.0         31.0       1020.4       1017.5       NaN       NaN   \n",
       "2         79.0         68.0       1020.3       1015.7       1.0       3.0   \n",
       "3         82.0         44.0       1012.5       1005.9       6.0       6.0   \n",
       "4         58.0         35.0       1019.8       1014.1       2.0       4.0   \n",
       "\n",
       "   Temp9am  Temp3pm  \n",
       "0     27.5      NaN  \n",
       "1     14.6     23.6  \n",
       "2     17.5     20.8  \n",
       "3     18.5     27.5  \n",
       "4     12.4     22.4  \n",
       "\n",
       "[5 rows x 21 columns]"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "Xtrain.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0     No\n",
       "1     No\n",
       "2     No\n",
       "3    Yes\n",
       "4     No\n",
       "Name: RainTomorrow, dtype: object"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "Ytrain.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "No     2704\n",
       "Yes     796\n",
       "Name: RainTomorrow, dtype: int64"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#是否有样本不平衡问题？\n",
    "Ytrain.value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "No     1157\n",
       "Yes     343\n",
       "Name: RainTomorrow, dtype: int64"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "Ytest.value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
    "#有轻微的样本不均衡问题"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "3.3969849246231156"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "Ytrain.value_counts()[0]/Ytrain.value_counts()[1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [],
   "source": [
    "#将标签编码\n",
    "from sklearn.preprocessing import LabelEncoder #标签专用，第三章讲过\n",
    "encorder = LabelEncoder().fit(Ytrain) #允许一维数据的输入的\n",
    "#认得了：有两类，YES和NO，YES是1，NO是0"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [],
   "source": [
    "#使用训练集进行训练，然后在训练集和测试集上分别进行transform\n",
    "Ytrain = pd.DataFrame(encorder.transform(Ytrain))\n",
    "Ytest = pd.DataFrame(encorder.transform(Ytest))\n",
    "\n",
    "#如果我们的测试集中，出现了训练集中没有出现过的标签类别\n",
    "#比如说，测试集中有YES, NO, UNKNOWN\n",
    "#而我们的训练集中只有YES和NO"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3470</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3471</th>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3472</th>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3473</th>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3474</th>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3475</th>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3476</th>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3477</th>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3478</th>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3479</th>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3480</th>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3481</th>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3482</th>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3483</th>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3484</th>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3485</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3486</th>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3487</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3488</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3489</th>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3490</th>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3491</th>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3492</th>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3493</th>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3494</th>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3495</th>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3496</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3497</th>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3498</th>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3499</th>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>3500 rows × 1 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "      0\n",
       "0     0\n",
       "1     0\n",
       "2     0\n",
       "3     1\n",
       "4     0\n",
       "5     0\n",
       "6     0\n",
       "7     0\n",
       "8     0\n",
       "9     1\n",
       "10    0\n",
       "11    0\n",
       "12    0\n",
       "13    0\n",
       "14    0\n",
       "15    0\n",
       "16    0\n",
       "17    0\n",
       "18    0\n",
       "19    0\n",
       "20    1\n",
       "21    1\n",
       "22    0\n",
       "23    0\n",
       "24    0\n",
       "25    1\n",
       "26    0\n",
       "27    0\n",
       "28    1\n",
       "29    0\n",
       "...  ..\n",
       "3470  1\n",
       "3471  0\n",
       "3472  0\n",
       "3473  0\n",
       "3474  0\n",
       "3475  0\n",
       "3476  0\n",
       "3477  0\n",
       "3478  0\n",
       "3479  0\n",
       "3480  0\n",
       "3481  0\n",
       "3482  0\n",
       "3483  0\n",
       "3484  0\n",
       "3485  1\n",
       "3486  0\n",
       "3487  1\n",
       "3488  1\n",
       "3489  0\n",
       "3490  0\n",
       "3491  0\n",
       "3492  0\n",
       "3493  0\n",
       "3494  0\n",
       "3495  0\n",
       "3496  1\n",
       "3497  0\n",
       "3498  0\n",
       "3499  0\n",
       "\n",
       "[3500 rows x 1 columns]"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "Ytrain"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   0\n",
       "0  0\n",
       "1  0\n",
       "2  1\n",
       "3  0\n",
       "4  0"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "Ytest.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [],
   "source": [
    "Ytrain.to_csv(\"你想要保存这个文件的地址.文件名.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>count</th>\n",
       "      <th>mean</th>\n",
       "      <th>std</th>\n",
       "      <th>min</th>\n",
       "      <th>1%</th>\n",
       "      <th>5%</th>\n",
       "      <th>10%</th>\n",
       "      <th>25%</th>\n",
       "      <th>50%</th>\n",
       "      <th>75%</th>\n",
       "      <th>90%</th>\n",
       "      <th>99%</th>\n",
       "      <th>max</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>MinTemp</th>\n",
       "      <td>3486.0</td>\n",
       "      <td>12.225645</td>\n",
       "      <td>6.396243</td>\n",
       "      <td>-6.5</td>\n",
       "      <td>-1.715</td>\n",
       "      <td>1.800</td>\n",
       "      <td>4.1</td>\n",
       "      <td>7.7</td>\n",
       "      <td>12.0</td>\n",
       "      <td>16.7</td>\n",
       "      <td>20.9</td>\n",
       "      <td>25.900</td>\n",
       "      <td>29.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>MaxTemp</th>\n",
       "      <td>3489.0</td>\n",
       "      <td>23.245543</td>\n",
       "      <td>7.201839</td>\n",
       "      <td>-3.7</td>\n",
       "      <td>8.888</td>\n",
       "      <td>12.840</td>\n",
       "      <td>14.5</td>\n",
       "      <td>18.0</td>\n",
       "      <td>22.5</td>\n",
       "      <td>28.4</td>\n",
       "      <td>33.0</td>\n",
       "      <td>40.400</td>\n",
       "      <td>46.4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Rainfall</th>\n",
       "      <td>3467.0</td>\n",
       "      <td>2.487049</td>\n",
       "      <td>7.949686</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000</td>\n",
       "      <td>0.000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.8</td>\n",
       "      <td>6.6</td>\n",
       "      <td>41.272</td>\n",
       "      <td>115.8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Evaporation</th>\n",
       "      <td>1983.0</td>\n",
       "      <td>5.619163</td>\n",
       "      <td>4.383098</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.400</td>\n",
       "      <td>0.800</td>\n",
       "      <td>1.4</td>\n",
       "      <td>2.6</td>\n",
       "      <td>4.8</td>\n",
       "      <td>7.4</td>\n",
       "      <td>10.2</td>\n",
       "      <td>20.600</td>\n",
       "      <td>56.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Sunshine</th>\n",
       "      <td>1790.0</td>\n",
       "      <td>7.508659</td>\n",
       "      <td>3.805841</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000</td>\n",
       "      <td>0.345</td>\n",
       "      <td>1.4</td>\n",
       "      <td>4.6</td>\n",
       "      <td>8.3</td>\n",
       "      <td>10.6</td>\n",
       "      <td>12.0</td>\n",
       "      <td>13.300</td>\n",
       "      <td>13.9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>WindGustSpeed</th>\n",
       "      <td>3263.0</td>\n",
       "      <td>39.858413</td>\n",
       "      <td>13.219607</td>\n",
       "      <td>9.0</td>\n",
       "      <td>15.000</td>\n",
       "      <td>20.000</td>\n",
       "      <td>24.0</td>\n",
       "      <td>31.0</td>\n",
       "      <td>39.0</td>\n",
       "      <td>48.0</td>\n",
       "      <td>57.0</td>\n",
       "      <td>76.000</td>\n",
       "      <td>117.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>WindSpeed9am</th>\n",
       "      <td>3466.0</td>\n",
       "      <td>14.046163</td>\n",
       "      <td>8.670472</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000</td>\n",
       "      <td>0.000</td>\n",
       "      <td>4.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>13.0</td>\n",
       "      <td>19.0</td>\n",
       "      <td>26.0</td>\n",
       "      <td>37.000</td>\n",
       "      <td>65.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>WindSpeed3pm</th>\n",
       "      <td>3437.0</td>\n",
       "      <td>18.553390</td>\n",
       "      <td>8.611818</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.000</td>\n",
       "      <td>6.000</td>\n",
       "      <td>7.0</td>\n",
       "      <td>13.0</td>\n",
       "      <td>19.0</td>\n",
       "      <td>24.0</td>\n",
       "      <td>30.0</td>\n",
       "      <td>43.000</td>\n",
       "      <td>65.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Humidity9am</th>\n",
       "      <td>3459.0</td>\n",
       "      <td>69.069095</td>\n",
       "      <td>18.787698</td>\n",
       "      <td>2.0</td>\n",
       "      <td>18.000</td>\n",
       "      <td>35.000</td>\n",
       "      <td>45.0</td>\n",
       "      <td>57.0</td>\n",
       "      <td>70.0</td>\n",
       "      <td>83.0</td>\n",
       "      <td>94.0</td>\n",
       "      <td>100.000</td>\n",
       "      <td>100.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Humidity3pm</th>\n",
       "      <td>3408.0</td>\n",
       "      <td>51.651995</td>\n",
       "      <td>20.697872</td>\n",
       "      <td>2.0</td>\n",
       "      <td>9.000</td>\n",
       "      <td>17.000</td>\n",
       "      <td>23.0</td>\n",
       "      <td>37.0</td>\n",
       "      <td>52.0</td>\n",
       "      <td>66.0</td>\n",
       "      <td>79.0</td>\n",
       "      <td>98.000</td>\n",
       "      <td>100.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Pressure9am</th>\n",
       "      <td>3154.0</td>\n",
       "      <td>1017.622067</td>\n",
       "      <td>7.065236</td>\n",
       "      <td>985.1</td>\n",
       "      <td>1000.506</td>\n",
       "      <td>1006.100</td>\n",
       "      <td>1008.9</td>\n",
       "      <td>1012.8</td>\n",
       "      <td>1017.6</td>\n",
       "      <td>1022.3</td>\n",
       "      <td>1027.0</td>\n",
       "      <td>1033.247</td>\n",
       "      <td>1038.1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Pressure3pm</th>\n",
       "      <td>3154.0</td>\n",
       "      <td>1015.227077</td>\n",
       "      <td>7.032531</td>\n",
       "      <td>980.2</td>\n",
       "      <td>998.000</td>\n",
       "      <td>1004.000</td>\n",
       "      <td>1006.5</td>\n",
       "      <td>1010.3</td>\n",
       "      <td>1015.2</td>\n",
       "      <td>1020.0</td>\n",
       "      <td>1024.4</td>\n",
       "      <td>1030.800</td>\n",
       "      <td>1036.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Cloud9am</th>\n",
       "      <td>2171.0</td>\n",
       "      <td>4.491939</td>\n",
       "      <td>2.858781</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000</td>\n",
       "      <td>0.000</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>8.000</td>\n",
       "      <td>8.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Cloud3pm</th>\n",
       "      <td>2095.0</td>\n",
       "      <td>4.603819</td>\n",
       "      <td>2.655765</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000</td>\n",
       "      <td>0.000</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>8.000</td>\n",
       "      <td>8.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Temp9am</th>\n",
       "      <td>3481.0</td>\n",
       "      <td>16.989859</td>\n",
       "      <td>6.537552</td>\n",
       "      <td>-5.2</td>\n",
       "      <td>2.400</td>\n",
       "      <td>7.000</td>\n",
       "      <td>9.0</td>\n",
       "      <td>12.2</td>\n",
       "      <td>16.6</td>\n",
       "      <td>21.6</td>\n",
       "      <td>26.0</td>\n",
       "      <td>31.000</td>\n",
       "      <td>38.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Temp3pm</th>\n",
       "      <td>3431.0</td>\n",
       "      <td>21.719003</td>\n",
       "      <td>7.031199</td>\n",
       "      <td>-4.1</td>\n",
       "      <td>7.460</td>\n",
       "      <td>11.500</td>\n",
       "      <td>13.3</td>\n",
       "      <td>16.6</td>\n",
       "      <td>21.0</td>\n",
       "      <td>26.6</td>\n",
       "      <td>31.4</td>\n",
       "      <td>38.600</td>\n",
       "      <td>45.9</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                count         mean        std    min        1%        5%  \\\n",
       "MinTemp        3486.0    12.225645   6.396243   -6.5    -1.715     1.800   \n",
       "MaxTemp        3489.0    23.245543   7.201839   -3.7     8.888    12.840   \n",
       "Rainfall       3467.0     2.487049   7.949686    0.0     0.000     0.000   \n",
       "Evaporation    1983.0     5.619163   4.383098    0.0     0.400     0.800   \n",
       "Sunshine       1790.0     7.508659   3.805841    0.0     0.000     0.345   \n",
       "WindGustSpeed  3263.0    39.858413  13.219607    9.0    15.000    20.000   \n",
       "WindSpeed9am   3466.0    14.046163   8.670472    0.0     0.000     0.000   \n",
       "WindSpeed3pm   3437.0    18.553390   8.611818    0.0     2.000     6.000   \n",
       "Humidity9am    3459.0    69.069095  18.787698    2.0    18.000    35.000   \n",
       "Humidity3pm    3408.0    51.651995  20.697872    2.0     9.000    17.000   \n",
       "Pressure9am    3154.0  1017.622067   7.065236  985.1  1000.506  1006.100   \n",
       "Pressure3pm    3154.0  1015.227077   7.032531  980.2   998.000  1004.000   \n",
       "Cloud9am       2171.0     4.491939   2.858781    0.0     0.000     0.000   \n",
       "Cloud3pm       2095.0     4.603819   2.655765    0.0     0.000     0.000   \n",
       "Temp9am        3481.0    16.989859   6.537552   -5.2     2.400     7.000   \n",
       "Temp3pm        3431.0    21.719003   7.031199   -4.1     7.460    11.500   \n",
       "\n",
       "                  10%     25%     50%     75%     90%       99%     max  \n",
       "MinTemp           4.1     7.7    12.0    16.7    20.9    25.900    29.0  \n",
       "MaxTemp          14.5    18.0    22.5    28.4    33.0    40.400    46.4  \n",
       "Rainfall          0.0     0.0     0.0     0.8     6.6    41.272   115.8  \n",
       "Evaporation       1.4     2.6     4.8     7.4    10.2    20.600    56.0  \n",
       "Sunshine          1.4     4.6     8.3    10.6    12.0    13.300    13.9  \n",
       "WindGustSpeed    24.0    31.0    39.0    48.0    57.0    76.000   117.0  \n",
       "WindSpeed9am      4.0     7.0    13.0    19.0    26.0    37.000    65.0  \n",
       "WindSpeed3pm      7.0    13.0    19.0    24.0    30.0    43.000    65.0  \n",
       "Humidity9am      45.0    57.0    70.0    83.0    94.0   100.000   100.0  \n",
       "Humidity3pm      23.0    37.0    52.0    66.0    79.0    98.000   100.0  \n",
       "Pressure9am    1008.9  1012.8  1017.6  1022.3  1027.0  1033.247  1038.1  \n",
       "Pressure3pm    1006.5  1010.3  1015.2  1020.0  1024.4  1030.800  1036.0  \n",
       "Cloud9am          1.0     1.0     5.0     7.0     8.0     8.000     8.0  \n",
       "Cloud3pm          1.0     2.0     5.0     7.0     8.0     8.000     8.0  \n",
       "Temp9am           9.0    12.2    16.6    21.6    26.0    31.000    38.0  \n",
       "Temp3pm          13.3    16.6    21.0    26.6    31.4    38.600    45.9  "
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#描述性统计\n",
    "Xtrain.describe([0.01,0.05,0.1,0.25,0.5,0.75,0.9,0.99]).T"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>count</th>\n",
       "      <th>mean</th>\n",
       "      <th>std</th>\n",
       "      <th>min</th>\n",
       "      <th>1%</th>\n",
       "      <th>5%</th>\n",
       "      <th>10%</th>\n",
       "      <th>25%</th>\n",
       "      <th>50%</th>\n",
       "      <th>75%</th>\n",
       "      <th>90%</th>\n",
       "      <th>99%</th>\n",
       "      <th>max</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>MinTemp</th>\n",
       "      <td>1493.0</td>\n",
       "      <td>11.916812</td>\n",
       "      <td>6.375377</td>\n",
       "      <td>-8.5</td>\n",
       "      <td>-2.024</td>\n",
       "      <td>1.600</td>\n",
       "      <td>3.70</td>\n",
       "      <td>7.3</td>\n",
       "      <td>11.8</td>\n",
       "      <td>16.5</td>\n",
       "      <td>20.48</td>\n",
       "      <td>25.316</td>\n",
       "      <td>28.3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>MaxTemp</th>\n",
       "      <td>1498.0</td>\n",
       "      <td>22.906809</td>\n",
       "      <td>6.986043</td>\n",
       "      <td>-0.8</td>\n",
       "      <td>9.134</td>\n",
       "      <td>13.000</td>\n",
       "      <td>14.50</td>\n",
       "      <td>17.8</td>\n",
       "      <td>22.4</td>\n",
       "      <td>27.8</td>\n",
       "      <td>32.60</td>\n",
       "      <td>38.303</td>\n",
       "      <td>45.1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Rainfall</th>\n",
       "      <td>1483.0</td>\n",
       "      <td>2.241807</td>\n",
       "      <td>7.988822</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000</td>\n",
       "      <td>0.000</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.8</td>\n",
       "      <td>5.20</td>\n",
       "      <td>35.372</td>\n",
       "      <td>108.2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Evaporation</th>\n",
       "      <td>858.0</td>\n",
       "      <td>5.657809</td>\n",
       "      <td>4.105762</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.400</td>\n",
       "      <td>1.000</td>\n",
       "      <td>1.60</td>\n",
       "      <td>2.8</td>\n",
       "      <td>4.8</td>\n",
       "      <td>7.6</td>\n",
       "      <td>10.40</td>\n",
       "      <td>19.458</td>\n",
       "      <td>38.8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Sunshine</th>\n",
       "      <td>781.0</td>\n",
       "      <td>7.677465</td>\n",
       "      <td>3.862294</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000</td>\n",
       "      <td>0.300</td>\n",
       "      <td>1.50</td>\n",
       "      <td>4.7</td>\n",
       "      <td>8.6</td>\n",
       "      <td>10.7</td>\n",
       "      <td>12.20</td>\n",
       "      <td>13.400</td>\n",
       "      <td>13.9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>WindGustSpeed</th>\n",
       "      <td>1406.0</td>\n",
       "      <td>40.044097</td>\n",
       "      <td>14.027052</td>\n",
       "      <td>9.0</td>\n",
       "      <td>15.000</td>\n",
       "      <td>20.000</td>\n",
       "      <td>24.00</td>\n",
       "      <td>30.0</td>\n",
       "      <td>39.0</td>\n",
       "      <td>48.0</td>\n",
       "      <td>57.00</td>\n",
       "      <td>78.000</td>\n",
       "      <td>122.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>WindSpeed9am</th>\n",
       "      <td>1483.0</td>\n",
       "      <td>13.986514</td>\n",
       "      <td>9.124337</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000</td>\n",
       "      <td>0.000</td>\n",
       "      <td>4.00</td>\n",
       "      <td>7.0</td>\n",
       "      <td>13.0</td>\n",
       "      <td>20.0</td>\n",
       "      <td>26.00</td>\n",
       "      <td>39.360</td>\n",
       "      <td>72.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>WindSpeed3pm</th>\n",
       "      <td>1482.0</td>\n",
       "      <td>18.601215</td>\n",
       "      <td>8.850446</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.000</td>\n",
       "      <td>6.000</td>\n",
       "      <td>7.00</td>\n",
       "      <td>13.0</td>\n",
       "      <td>19.0</td>\n",
       "      <td>24.0</td>\n",
       "      <td>31.00</td>\n",
       "      <td>43.000</td>\n",
       "      <td>56.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Humidity9am</th>\n",
       "      <td>1477.0</td>\n",
       "      <td>68.688558</td>\n",
       "      <td>18.876448</td>\n",
       "      <td>4.0</td>\n",
       "      <td>20.000</td>\n",
       "      <td>36.000</td>\n",
       "      <td>44.00</td>\n",
       "      <td>57.0</td>\n",
       "      <td>69.0</td>\n",
       "      <td>82.0</td>\n",
       "      <td>95.00</td>\n",
       "      <td>100.000</td>\n",
       "      <td>100.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Humidity3pm</th>\n",
       "      <td>1472.0</td>\n",
       "      <td>51.431386</td>\n",
       "      <td>20.459957</td>\n",
       "      <td>2.0</td>\n",
       "      <td>8.710</td>\n",
       "      <td>18.000</td>\n",
       "      <td>23.00</td>\n",
       "      <td>37.0</td>\n",
       "      <td>52.0</td>\n",
       "      <td>66.0</td>\n",
       "      <td>78.00</td>\n",
       "      <td>96.290</td>\n",
       "      <td>100.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Pressure9am</th>\n",
       "      <td>1352.0</td>\n",
       "      <td>1017.763536</td>\n",
       "      <td>6.910275</td>\n",
       "      <td>988.5</td>\n",
       "      <td>1000.900</td>\n",
       "      <td>1006.255</td>\n",
       "      <td>1008.61</td>\n",
       "      <td>1013.2</td>\n",
       "      <td>1017.8</td>\n",
       "      <td>1022.3</td>\n",
       "      <td>1026.50</td>\n",
       "      <td>1033.449</td>\n",
       "      <td>1038.2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Pressure3pm</th>\n",
       "      <td>1350.0</td>\n",
       "      <td>1015.397926</td>\n",
       "      <td>6.916976</td>\n",
       "      <td>986.2</td>\n",
       "      <td>999.198</td>\n",
       "      <td>1003.900</td>\n",
       "      <td>1006.49</td>\n",
       "      <td>1010.9</td>\n",
       "      <td>1015.4</td>\n",
       "      <td>1020.0</td>\n",
       "      <td>1024.20</td>\n",
       "      <td>1031.151</td>\n",
       "      <td>1036.9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Cloud9am</th>\n",
       "      <td>940.0</td>\n",
       "      <td>4.494681</td>\n",
       "      <td>2.870468</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000</td>\n",
       "      <td>0.000</td>\n",
       "      <td>1.00</td>\n",
       "      <td>1.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>8.00</td>\n",
       "      <td>8.000</td>\n",
       "      <td>8.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Cloud3pm</th>\n",
       "      <td>917.0</td>\n",
       "      <td>4.403490</td>\n",
       "      <td>2.731969</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000</td>\n",
       "      <td>0.000</td>\n",
       "      <td>1.00</td>\n",
       "      <td>2.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>8.00</td>\n",
       "      <td>8.000</td>\n",
       "      <td>8.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Temp9am</th>\n",
       "      <td>1486.0</td>\n",
       "      <td>16.751817</td>\n",
       "      <td>6.339816</td>\n",
       "      <td>-5.3</td>\n",
       "      <td>2.370</td>\n",
       "      <td>6.725</td>\n",
       "      <td>9.00</td>\n",
       "      <td>12.1</td>\n",
       "      <td>16.5</td>\n",
       "      <td>21.3</td>\n",
       "      <td>25.45</td>\n",
       "      <td>30.200</td>\n",
       "      <td>35.1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Temp3pm</th>\n",
       "      <td>1481.0</td>\n",
       "      <td>21.483660</td>\n",
       "      <td>6.770567</td>\n",
       "      <td>-1.2</td>\n",
       "      <td>8.540</td>\n",
       "      <td>11.800</td>\n",
       "      <td>13.30</td>\n",
       "      <td>16.5</td>\n",
       "      <td>20.9</td>\n",
       "      <td>26.2</td>\n",
       "      <td>30.90</td>\n",
       "      <td>37.400</td>\n",
       "      <td>42.9</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                count         mean        std    min        1%        5%  \\\n",
       "MinTemp        1493.0    11.916812   6.375377   -8.5    -2.024     1.600   \n",
       "MaxTemp        1498.0    22.906809   6.986043   -0.8     9.134    13.000   \n",
       "Rainfall       1483.0     2.241807   7.988822    0.0     0.000     0.000   \n",
       "Evaporation     858.0     5.657809   4.105762    0.0     0.400     1.000   \n",
       "Sunshine        781.0     7.677465   3.862294    0.0     0.000     0.300   \n",
       "WindGustSpeed  1406.0    40.044097  14.027052    9.0    15.000    20.000   \n",
       "WindSpeed9am   1483.0    13.986514   9.124337    0.0     0.000     0.000   \n",
       "WindSpeed3pm   1482.0    18.601215   8.850446    0.0     2.000     6.000   \n",
       "Humidity9am    1477.0    68.688558  18.876448    4.0    20.000    36.000   \n",
       "Humidity3pm    1472.0    51.431386  20.459957    2.0     8.710    18.000   \n",
       "Pressure9am    1352.0  1017.763536   6.910275  988.5  1000.900  1006.255   \n",
       "Pressure3pm    1350.0  1015.397926   6.916976  986.2   999.198  1003.900   \n",
       "Cloud9am        940.0     4.494681   2.870468    0.0     0.000     0.000   \n",
       "Cloud3pm        917.0     4.403490   2.731969    0.0     0.000     0.000   \n",
       "Temp9am        1486.0    16.751817   6.339816   -5.3     2.370     6.725   \n",
       "Temp3pm        1481.0    21.483660   6.770567   -1.2     8.540    11.800   \n",
       "\n",
       "                   10%     25%     50%     75%      90%       99%     max  \n",
       "MinTemp           3.70     7.3    11.8    16.5    20.48    25.316    28.3  \n",
       "MaxTemp          14.50    17.8    22.4    27.8    32.60    38.303    45.1  \n",
       "Rainfall          0.00     0.0     0.0     0.8     5.20    35.372   108.2  \n",
       "Evaporation       1.60     2.8     4.8     7.6    10.40    19.458    38.8  \n",
       "Sunshine          1.50     4.7     8.6    10.7    12.20    13.400    13.9  \n",
       "WindGustSpeed    24.00    30.0    39.0    48.0    57.00    78.000   122.0  \n",
       "WindSpeed9am      4.00     7.0    13.0    20.0    26.00    39.360    72.0  \n",
       "WindSpeed3pm      7.00    13.0    19.0    24.0    31.00    43.000    56.0  \n",
       "Humidity9am      44.00    57.0    69.0    82.0    95.00   100.000   100.0  \n",
       "Humidity3pm      23.00    37.0    52.0    66.0    78.00    96.290   100.0  \n",
       "Pressure9am    1008.61  1013.2  1017.8  1022.3  1026.50  1033.449  1038.2  \n",
       "Pressure3pm    1006.49  1010.9  1015.4  1020.0  1024.20  1031.151  1036.9  \n",
       "Cloud9am          1.00     1.0     5.0     7.0     8.00     8.000     8.0  \n",
       "Cloud3pm          1.00     2.0     5.0     7.0     8.00     8.000     8.0  \n",
       "Temp9am           9.00    12.1    16.5    21.3    25.45    30.200    35.1  \n",
       "Temp3pm          13.30    16.5    20.9    26.2    30.90    37.400    42.9  "
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "Xtest.describe([0.01,0.05,0.1,0.25,0.5,0.75,0.9,0.99]).T"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [],
   "source": [
    "#对于去kaggle上下载了数据的小伙伴们，以及对于坚持要使用完整版数据的（15W行）数据的小伙伴们\n",
    "#如果你发现了异常值，首先你要观察，这个异常值出现的频率\n",
    "#如果异常值只出现了一次，多半是输入错误，直接把异常值删除\n",
    "#如果异常值出现了多次，去跟业务人员沟通，人为造成的错误异常值留着是没有用的\n",
    "#如果异常值占到你总数据量的10%左右了 - 把异常值替换成非异常但是非干扰的项，比如说用0来进行替换，或者把异常当缺失"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Date</th>\n",
       "      <th>Location</th>\n",
       "      <th>MinTemp</th>\n",
       "      <th>MaxTemp</th>\n",
       "      <th>Rainfall</th>\n",
       "      <th>Evaporation</th>\n",
       "      <th>Sunshine</th>\n",
       "      <th>WindGustDir</th>\n",
       "      <th>WindGustSpeed</th>\n",
       "      <th>WindDir9am</th>\n",
       "      <th>...</th>\n",
       "      <th>WindSpeed9am</th>\n",
       "      <th>WindSpeed3pm</th>\n",
       "      <th>Humidity9am</th>\n",
       "      <th>Humidity3pm</th>\n",
       "      <th>Pressure9am</th>\n",
       "      <th>Pressure3pm</th>\n",
       "      <th>Cloud9am</th>\n",
       "      <th>Cloud3pm</th>\n",
       "      <th>Temp9am</th>\n",
       "      <th>Temp3pm</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2015-08-24</td>\n",
       "      <td>Katherine</td>\n",
       "      <td>17.5</td>\n",
       "      <td>36.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>8.8</td>\n",
       "      <td>NaN</td>\n",
       "      <td>ESE</td>\n",
       "      <td>26.0</td>\n",
       "      <td>NNW</td>\n",
       "      <td>...</td>\n",
       "      <td>17.0</td>\n",
       "      <td>15.0</td>\n",
       "      <td>57.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1016.8</td>\n",
       "      <td>1012.2</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>27.5</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2016-12-10</td>\n",
       "      <td>Tuggeranong</td>\n",
       "      <td>9.5</td>\n",
       "      <td>25.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NNW</td>\n",
       "      <td>33.0</td>\n",
       "      <td>NE</td>\n",
       "      <td>...</td>\n",
       "      <td>7.0</td>\n",
       "      <td>17.0</td>\n",
       "      <td>59.0</td>\n",
       "      <td>31.0</td>\n",
       "      <td>1020.4</td>\n",
       "      <td>1017.5</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>14.6</td>\n",
       "      <td>23.6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2010-04-18</td>\n",
       "      <td>Albany</td>\n",
       "      <td>13.0</td>\n",
       "      <td>22.6</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3.8</td>\n",
       "      <td>10.4</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NE</td>\n",
       "      <td>...</td>\n",
       "      <td>17.0</td>\n",
       "      <td>31.0</td>\n",
       "      <td>79.0</td>\n",
       "      <td>68.0</td>\n",
       "      <td>1020.3</td>\n",
       "      <td>1015.7</td>\n",
       "      <td>1.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>17.5</td>\n",
       "      <td>20.8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2009-11-26</td>\n",
       "      <td>Sale</td>\n",
       "      <td>13.9</td>\n",
       "      <td>29.8</td>\n",
       "      <td>0.0</td>\n",
       "      <td>5.8</td>\n",
       "      <td>5.1</td>\n",
       "      <td>S</td>\n",
       "      <td>37.0</td>\n",
       "      <td>N</td>\n",
       "      <td>...</td>\n",
       "      <td>11.0</td>\n",
       "      <td>28.0</td>\n",
       "      <td>82.0</td>\n",
       "      <td>44.0</td>\n",
       "      <td>1012.5</td>\n",
       "      <td>1005.9</td>\n",
       "      <td>6.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>18.5</td>\n",
       "      <td>27.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2014-04-25</td>\n",
       "      <td>Mildura</td>\n",
       "      <td>6.0</td>\n",
       "      <td>23.5</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.8</td>\n",
       "      <td>8.6</td>\n",
       "      <td>NNE</td>\n",
       "      <td>24.0</td>\n",
       "      <td>E</td>\n",
       "      <td>...</td>\n",
       "      <td>15.0</td>\n",
       "      <td>15.0</td>\n",
       "      <td>58.0</td>\n",
       "      <td>35.0</td>\n",
       "      <td>1019.8</td>\n",
       "      <td>1014.1</td>\n",
       "      <td>2.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>12.4</td>\n",
       "      <td>22.4</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 21 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "         Date     Location  MinTemp  MaxTemp  Rainfall  Evaporation  Sunshine  \\\n",
       "0  2015-08-24    Katherine     17.5     36.0       0.0          8.8       NaN   \n",
       "1  2016-12-10  Tuggeranong      9.5     25.0       0.0          NaN       NaN   \n",
       "2  2010-04-18       Albany     13.0     22.6       0.0          3.8      10.4   \n",
       "3  2009-11-26         Sale     13.9     29.8       0.0          5.8       5.1   \n",
       "4  2014-04-25      Mildura      6.0     23.5       0.0          2.8       8.6   \n",
       "\n",
       "  WindGustDir  WindGustSpeed WindDir9am   ...    WindSpeed9am  WindSpeed3pm  \\\n",
       "0         ESE           26.0        NNW   ...            17.0          15.0   \n",
       "1         NNW           33.0         NE   ...             7.0          17.0   \n",
       "2         NaN            NaN         NE   ...            17.0          31.0   \n",
       "3           S           37.0          N   ...            11.0          28.0   \n",
       "4         NNE           24.0          E   ...            15.0          15.0   \n",
       "\n",
       "   Humidity9am  Humidity3pm  Pressure9am  Pressure3pm  Cloud9am  Cloud3pm  \\\n",
       "0         57.0          NaN       1016.8       1012.2       0.0       NaN   \n",
       "1         59.0         31.0       1020.4       1017.5       NaN       NaN   \n",
       "2         79.0         68.0       1020.3       1015.7       1.0       3.0   \n",
       "3         82.0         44.0       1012.5       1005.9       6.0       6.0   \n",
       "4         58.0         35.0       1019.8       1014.1       2.0       4.0   \n",
       "\n",
       "   Temp9am  Temp3pm  \n",
       "0     27.5      NaN  \n",
       "1     14.6     23.6  \n",
       "2     17.5     20.8  \n",
       "3     18.5     27.5  \n",
       "4     12.4     22.4  \n",
       "\n",
       "[5 rows x 21 columns]"
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "Xtrain.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "str"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "type(Xtrain.iloc[0,0]) #字符串"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [],
   "source": [
    "Xtrainc = Xtrain.copy()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Date</th>\n",
       "      <th>Location</th>\n",
       "      <th>MinTemp</th>\n",
       "      <th>MaxTemp</th>\n",
       "      <th>Rainfall</th>\n",
       "      <th>Evaporation</th>\n",
       "      <th>Sunshine</th>\n",
       "      <th>WindGustDir</th>\n",
       "      <th>WindGustSpeed</th>\n",
       "      <th>WindDir9am</th>\n",
       "      <th>...</th>\n",
       "      <th>WindSpeed9am</th>\n",
       "      <th>WindSpeed3pm</th>\n",
       "      <th>Humidity9am</th>\n",
       "      <th>Humidity3pm</th>\n",
       "      <th>Pressure9am</th>\n",
       "      <th>Pressure3pm</th>\n",
       "      <th>Cloud9am</th>\n",
       "      <th>Cloud3pm</th>\n",
       "      <th>Temp9am</th>\n",
       "      <th>Temp3pm</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2796</th>\n",
       "      <td>2015-03-24</td>\n",
       "      <td>Adelaide</td>\n",
       "      <td>12.3</td>\n",
       "      <td>19.3</td>\n",
       "      <td>0.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>S</td>\n",
       "      <td>39.0</td>\n",
       "      <td>S</td>\n",
       "      <td>...</td>\n",
       "      <td>13.0</td>\n",
       "      <td>19.0</td>\n",
       "      <td>59.0</td>\n",
       "      <td>47.0</td>\n",
       "      <td>1022.2</td>\n",
       "      <td>1021.4</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>15.1</td>\n",
       "      <td>17.7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2975</th>\n",
       "      <td>2012-08-17</td>\n",
       "      <td>Adelaide</td>\n",
       "      <td>7.8</td>\n",
       "      <td>13.2</td>\n",
       "      <td>17.6</td>\n",
       "      <td>0.8</td>\n",
       "      <td>NaN</td>\n",
       "      <td>SW</td>\n",
       "      <td>61.0</td>\n",
       "      <td>SW</td>\n",
       "      <td>...</td>\n",
       "      <td>20.0</td>\n",
       "      <td>28.0</td>\n",
       "      <td>76.0</td>\n",
       "      <td>47.0</td>\n",
       "      <td>1012.5</td>\n",
       "      <td>1014.7</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>8.3</td>\n",
       "      <td>12.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>775</th>\n",
       "      <td>2013-03-16</td>\n",
       "      <td>Adelaide</td>\n",
       "      <td>17.4</td>\n",
       "      <td>23.8</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>9.7</td>\n",
       "      <td>SSE</td>\n",
       "      <td>46.0</td>\n",
       "      <td>S</td>\n",
       "      <td>...</td>\n",
       "      <td>9.0</td>\n",
       "      <td>19.0</td>\n",
       "      <td>63.0</td>\n",
       "      <td>57.0</td>\n",
       "      <td>1019.9</td>\n",
       "      <td>1020.5</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>19.1</td>\n",
       "      <td>20.7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>861</th>\n",
       "      <td>2011-07-12</td>\n",
       "      <td>Adelaide</td>\n",
       "      <td>7.9</td>\n",
       "      <td>11.4</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.5</td>\n",
       "      <td>N</td>\n",
       "      <td>20.0</td>\n",
       "      <td>NNE</td>\n",
       "      <td>...</td>\n",
       "      <td>7.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>70.0</td>\n",
       "      <td>59.0</td>\n",
       "      <td>1028.7</td>\n",
       "      <td>1025.7</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>8.4</td>\n",
       "      <td>11.3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2906</th>\n",
       "      <td>2015-08-24</td>\n",
       "      <td>Adelaide</td>\n",
       "      <td>9.2</td>\n",
       "      <td>14.3</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>SE</td>\n",
       "      <td>48.0</td>\n",
       "      <td>SE</td>\n",
       "      <td>...</td>\n",
       "      <td>17.0</td>\n",
       "      <td>19.0</td>\n",
       "      <td>64.0</td>\n",
       "      <td>42.0</td>\n",
       "      <td>1024.7</td>\n",
       "      <td>1024.1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>9.9</td>\n",
       "      <td>13.4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2900</th>\n",
       "      <td>2009-09-17</td>\n",
       "      <td>Adelaide</td>\n",
       "      <td>14.2</td>\n",
       "      <td>17.4</td>\n",
       "      <td>8.8</td>\n",
       "      <td>2.0</td>\n",
       "      <td>7.1</td>\n",
       "      <td>SW</td>\n",
       "      <td>41.0</td>\n",
       "      <td>SSW</td>\n",
       "      <td>...</td>\n",
       "      <td>15.0</td>\n",
       "      <td>20.0</td>\n",
       "      <td>82.0</td>\n",
       "      <td>56.0</td>\n",
       "      <td>1014.9</td>\n",
       "      <td>1018.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>16.2</td>\n",
       "      <td>16.7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>902</th>\n",
       "      <td>2008-12-10</td>\n",
       "      <td>Adelaide</td>\n",
       "      <td>14.2</td>\n",
       "      <td>28.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>6.4</td>\n",
       "      <td>12.5</td>\n",
       "      <td>SE</td>\n",
       "      <td>48.0</td>\n",
       "      <td>E</td>\n",
       "      <td>...</td>\n",
       "      <td>13.0</td>\n",
       "      <td>13.0</td>\n",
       "      <td>56.0</td>\n",
       "      <td>35.0</td>\n",
       "      <td>1010.8</td>\n",
       "      <td>1008.9</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>19.3</td>\n",
       "      <td>25.6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>919</th>\n",
       "      <td>2011-10-12</td>\n",
       "      <td>Adelaide</td>\n",
       "      <td>7.7</td>\n",
       "      <td>19.9</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3.4</td>\n",
       "      <td>11.4</td>\n",
       "      <td>W</td>\n",
       "      <td>30.0</td>\n",
       "      <td>E</td>\n",
       "      <td>...</td>\n",
       "      <td>13.0</td>\n",
       "      <td>15.0</td>\n",
       "      <td>56.0</td>\n",
       "      <td>43.0</td>\n",
       "      <td>1021.2</td>\n",
       "      <td>1018.1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>14.1</td>\n",
       "      <td>19.4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>936</th>\n",
       "      <td>2014-03-11</td>\n",
       "      <td>Adelaide</td>\n",
       "      <td>22.3</td>\n",
       "      <td>32.2</td>\n",
       "      <td>0.4</td>\n",
       "      <td>20.6</td>\n",
       "      <td>3.2</td>\n",
       "      <td>W</td>\n",
       "      <td>65.0</td>\n",
       "      <td>ESE</td>\n",
       "      <td>...</td>\n",
       "      <td>9.0</td>\n",
       "      <td>9.0</td>\n",
       "      <td>53.0</td>\n",
       "      <td>78.0</td>\n",
       "      <td>1017.0</td>\n",
       "      <td>1017.2</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>25.6</td>\n",
       "      <td>22.8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>942</th>\n",
       "      <td>2015-02-23</td>\n",
       "      <td>Adelaide</td>\n",
       "      <td>20.6</td>\n",
       "      <td>26.5</td>\n",
       "      <td>0.0</td>\n",
       "      <td>16.2</td>\n",
       "      <td>NaN</td>\n",
       "      <td>SSE</td>\n",
       "      <td>48.0</td>\n",
       "      <td>S</td>\n",
       "      <td>...</td>\n",
       "      <td>20.0</td>\n",
       "      <td>22.0</td>\n",
       "      <td>61.0</td>\n",
       "      <td>43.0</td>\n",
       "      <td>1015.3</td>\n",
       "      <td>1016.5</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>22.2</td>\n",
       "      <td>25.7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>999</th>\n",
       "      <td>2009-11-30</td>\n",
       "      <td>Adelaide</td>\n",
       "      <td>13.1</td>\n",
       "      <td>22.4</td>\n",
       "      <td>0.6</td>\n",
       "      <td>13.6</td>\n",
       "      <td>10.4</td>\n",
       "      <td>SSE</td>\n",
       "      <td>37.0</td>\n",
       "      <td>SSE</td>\n",
       "      <td>...</td>\n",
       "      <td>17.0</td>\n",
       "      <td>15.0</td>\n",
       "      <td>66.0</td>\n",
       "      <td>43.0</td>\n",
       "      <td>1017.9</td>\n",
       "      <td>1016.8</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>16.1</td>\n",
       "      <td>20.9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1013</th>\n",
       "      <td>2012-08-23</td>\n",
       "      <td>Adelaide</td>\n",
       "      <td>8.1</td>\n",
       "      <td>12.4</td>\n",
       "      <td>6.0</td>\n",
       "      <td>1.8</td>\n",
       "      <td>NaN</td>\n",
       "      <td>WNW</td>\n",
       "      <td>74.0</td>\n",
       "      <td>NW</td>\n",
       "      <td>...</td>\n",
       "      <td>26.0</td>\n",
       "      <td>37.0</td>\n",
       "      <td>74.0</td>\n",
       "      <td>78.0</td>\n",
       "      <td>1002.6</td>\n",
       "      <td>1005.6</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>10.7</td>\n",
       "      <td>7.4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1042</th>\n",
       "      <td>2008-11-03</td>\n",
       "      <td>Adelaide</td>\n",
       "      <td>13.3</td>\n",
       "      <td>21.2</td>\n",
       "      <td>0.0</td>\n",
       "      <td>15.2</td>\n",
       "      <td>10.0</td>\n",
       "      <td>SSE</td>\n",
       "      <td>39.0</td>\n",
       "      <td>SSW</td>\n",
       "      <td>...</td>\n",
       "      <td>15.0</td>\n",
       "      <td>20.0</td>\n",
       "      <td>50.0</td>\n",
       "      <td>39.0</td>\n",
       "      <td>1021.9</td>\n",
       "      <td>1020.1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>14.8</td>\n",
       "      <td>19.9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>701</th>\n",
       "      <td>2009-01-08</td>\n",
       "      <td>Adelaide</td>\n",
       "      <td>14.3</td>\n",
       "      <td>23.8</td>\n",
       "      <td>0.0</td>\n",
       "      <td>7.4</td>\n",
       "      <td>12.7</td>\n",
       "      <td>SE</td>\n",
       "      <td>37.0</td>\n",
       "      <td>SE</td>\n",
       "      <td>...</td>\n",
       "      <td>17.0</td>\n",
       "      <td>22.0</td>\n",
       "      <td>45.0</td>\n",
       "      <td>30.0</td>\n",
       "      <td>1019.9</td>\n",
       "      <td>1019.2</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>17.6</td>\n",
       "      <td>23.2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>109</th>\n",
       "      <td>2012-05-02</td>\n",
       "      <td>Adelaide</td>\n",
       "      <td>9.4</td>\n",
       "      <td>16.5</td>\n",
       "      <td>12.4</td>\n",
       "      <td>0.8</td>\n",
       "      <td>NaN</td>\n",
       "      <td>SSE</td>\n",
       "      <td>39.0</td>\n",
       "      <td>S</td>\n",
       "      <td>...</td>\n",
       "      <td>9.0</td>\n",
       "      <td>15.0</td>\n",
       "      <td>53.0</td>\n",
       "      <td>47.0</td>\n",
       "      <td>1029.6</td>\n",
       "      <td>1028.5</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>13.5</td>\n",
       "      <td>14.6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>227</th>\n",
       "      <td>2016-08-31</td>\n",
       "      <td>Adelaide</td>\n",
       "      <td>11.9</td>\n",
       "      <td>16.8</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>SW</td>\n",
       "      <td>28.0</td>\n",
       "      <td>WNW</td>\n",
       "      <td>...</td>\n",
       "      <td>11.0</td>\n",
       "      <td>13.0</td>\n",
       "      <td>80.0</td>\n",
       "      <td>79.0</td>\n",
       "      <td>1018.5</td>\n",
       "      <td>1017.7</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>14.3</td>\n",
       "      <td>15.2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1190</th>\n",
       "      <td>2015-07-01</td>\n",
       "      <td>Adelaide</td>\n",
       "      <td>4.7</td>\n",
       "      <td>14.5</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>WSW</td>\n",
       "      <td>20.0</td>\n",
       "      <td>NE</td>\n",
       "      <td>...</td>\n",
       "      <td>6.0</td>\n",
       "      <td>13.0</td>\n",
       "      <td>78.0</td>\n",
       "      <td>48.0</td>\n",
       "      <td>1030.6</td>\n",
       "      <td>1027.6</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>8.8</td>\n",
       "      <td>13.1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2701</th>\n",
       "      <td>2013-05-16</td>\n",
       "      <td>Adelaide</td>\n",
       "      <td>10.7</td>\n",
       "      <td>17.5</td>\n",
       "      <td>7.0</td>\n",
       "      <td>1.4</td>\n",
       "      <td>7.1</td>\n",
       "      <td>SW</td>\n",
       "      <td>35.0</td>\n",
       "      <td>NE</td>\n",
       "      <td>...</td>\n",
       "      <td>6.0</td>\n",
       "      <td>19.0</td>\n",
       "      <td>88.0</td>\n",
       "      <td>54.0</td>\n",
       "      <td>1016.3</td>\n",
       "      <td>1015.9</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>13.1</td>\n",
       "      <td>16.9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1227</th>\n",
       "      <td>2016-06-22</td>\n",
       "      <td>Adelaide</td>\n",
       "      <td>10.2</td>\n",
       "      <td>16.5</td>\n",
       "      <td>11.8</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>WNW</td>\n",
       "      <td>39.0</td>\n",
       "      <td>W</td>\n",
       "      <td>...</td>\n",
       "      <td>9.0</td>\n",
       "      <td>17.0</td>\n",
       "      <td>67.0</td>\n",
       "      <td>63.0</td>\n",
       "      <td>1010.3</td>\n",
       "      <td>1010.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>13.5</td>\n",
       "      <td>15.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1234</th>\n",
       "      <td>2017-01-09</td>\n",
       "      <td>Adelaide</td>\n",
       "      <td>20.2</td>\n",
       "      <td>30.4</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>SW</td>\n",
       "      <td>24.0</td>\n",
       "      <td>ESE</td>\n",
       "      <td>...</td>\n",
       "      <td>6.0</td>\n",
       "      <td>9.0</td>\n",
       "      <td>70.0</td>\n",
       "      <td>38.0</td>\n",
       "      <td>1012.7</td>\n",
       "      <td>1011.3</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>20.9</td>\n",
       "      <td>28.9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1266</th>\n",
       "      <td>2008-11-07</td>\n",
       "      <td>Adelaide</td>\n",
       "      <td>18.3</td>\n",
       "      <td>22.5</td>\n",
       "      <td>0.2</td>\n",
       "      <td>8.0</td>\n",
       "      <td>1.4</td>\n",
       "      <td>WNW</td>\n",
       "      <td>56.0</td>\n",
       "      <td>N</td>\n",
       "      <td>...</td>\n",
       "      <td>17.0</td>\n",
       "      <td>28.0</td>\n",
       "      <td>58.0</td>\n",
       "      <td>51.0</td>\n",
       "      <td>1001.0</td>\n",
       "      <td>1004.7</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>22.4</td>\n",
       "      <td>18.4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1282</th>\n",
       "      <td>2012-09-03</td>\n",
       "      <td>Adelaide</td>\n",
       "      <td>7.7</td>\n",
       "      <td>22.9</td>\n",
       "      <td>0.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>SE</td>\n",
       "      <td>56.0</td>\n",
       "      <td>N</td>\n",
       "      <td>...</td>\n",
       "      <td>13.0</td>\n",
       "      <td>17.0</td>\n",
       "      <td>27.0</td>\n",
       "      <td>22.0</td>\n",
       "      <td>1021.1</td>\n",
       "      <td>1018.2</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>17.5</td>\n",
       "      <td>22.1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1289</th>\n",
       "      <td>2010-03-09</td>\n",
       "      <td>Adelaide</td>\n",
       "      <td>14.3</td>\n",
       "      <td>22.0</td>\n",
       "      <td>1.6</td>\n",
       "      <td>16.4</td>\n",
       "      <td>7.8</td>\n",
       "      <td>SW</td>\n",
       "      <td>39.0</td>\n",
       "      <td>S</td>\n",
       "      <td>...</td>\n",
       "      <td>11.0</td>\n",
       "      <td>20.0</td>\n",
       "      <td>75.0</td>\n",
       "      <td>36.0</td>\n",
       "      <td>1021.2</td>\n",
       "      <td>1022.5</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>16.8</td>\n",
       "      <td>21.6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>86</th>\n",
       "      <td>2010-11-08</td>\n",
       "      <td>Adelaide</td>\n",
       "      <td>13.7</td>\n",
       "      <td>23.1</td>\n",
       "      <td>0.0</td>\n",
       "      <td>16.0</td>\n",
       "      <td>12.2</td>\n",
       "      <td>W</td>\n",
       "      <td>31.0</td>\n",
       "      <td>WNW</td>\n",
       "      <td>...</td>\n",
       "      <td>13.0</td>\n",
       "      <td>17.0</td>\n",
       "      <td>61.0</td>\n",
       "      <td>38.0</td>\n",
       "      <td>1016.0</td>\n",
       "      <td>1016.3</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>17.2</td>\n",
       "      <td>22.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>83</th>\n",
       "      <td>2012-10-08</td>\n",
       "      <td>Adelaide</td>\n",
       "      <td>10.2</td>\n",
       "      <td>23.6</td>\n",
       "      <td>0.2</td>\n",
       "      <td>10.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>E</td>\n",
       "      <td>33.0</td>\n",
       "      <td>NNE</td>\n",
       "      <td>...</td>\n",
       "      <td>11.0</td>\n",
       "      <td>17.0</td>\n",
       "      <td>46.0</td>\n",
       "      <td>28.0</td>\n",
       "      <td>1016.7</td>\n",
       "      <td>1012.9</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>15.1</td>\n",
       "      <td>22.6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1356</th>\n",
       "      <td>2014-03-05</td>\n",
       "      <td>Adelaide</td>\n",
       "      <td>16.7</td>\n",
       "      <td>24.8</td>\n",
       "      <td>0.0</td>\n",
       "      <td>6.6</td>\n",
       "      <td>11.7</td>\n",
       "      <td>S</td>\n",
       "      <td>37.0</td>\n",
       "      <td>S</td>\n",
       "      <td>...</td>\n",
       "      <td>15.0</td>\n",
       "      <td>24.0</td>\n",
       "      <td>61.0</td>\n",
       "      <td>48.0</td>\n",
       "      <td>1019.3</td>\n",
       "      <td>1018.9</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>20.8</td>\n",
       "      <td>23.7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1390</th>\n",
       "      <td>2015-10-04</td>\n",
       "      <td>Adelaide</td>\n",
       "      <td>15.4</td>\n",
       "      <td>29.7</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>WNW</td>\n",
       "      <td>20.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>9.0</td>\n",
       "      <td>44.0</td>\n",
       "      <td>22.0</td>\n",
       "      <td>1024.0</td>\n",
       "      <td>1022.4</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>18.3</td>\n",
       "      <td>27.4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>107</th>\n",
       "      <td>2015-10-01</td>\n",
       "      <td>Adelaide</td>\n",
       "      <td>7.8</td>\n",
       "      <td>24.4</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>WSW</td>\n",
       "      <td>22.0</td>\n",
       "      <td>N</td>\n",
       "      <td>...</td>\n",
       "      <td>9.0</td>\n",
       "      <td>15.0</td>\n",
       "      <td>35.0</td>\n",
       "      <td>23.0</td>\n",
       "      <td>1028.4</td>\n",
       "      <td>1028.6</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>19.1</td>\n",
       "      <td>23.8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3403</th>\n",
       "      <td>2015-12-05</td>\n",
       "      <td>Adelaide</td>\n",
       "      <td>20.7</td>\n",
       "      <td>40.2</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>SE</td>\n",
       "      <td>31.0</td>\n",
       "      <td>NE</td>\n",
       "      <td>...</td>\n",
       "      <td>9.0</td>\n",
       "      <td>13.0</td>\n",
       "      <td>18.0</td>\n",
       "      <td>13.0</td>\n",
       "      <td>1015.3</td>\n",
       "      <td>1012.1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>33.4</td>\n",
       "      <td>38.6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3023</th>\n",
       "      <td>2012-11-06</td>\n",
       "      <td>Adelaide</td>\n",
       "      <td>14.7</td>\n",
       "      <td>22.8</td>\n",
       "      <td>8.0</td>\n",
       "      <td>3.2</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NW</td>\n",
       "      <td>31.0</td>\n",
       "      <td>W</td>\n",
       "      <td>...</td>\n",
       "      <td>17.0</td>\n",
       "      <td>15.0</td>\n",
       "      <td>64.0</td>\n",
       "      <td>46.0</td>\n",
       "      <td>1011.9</td>\n",
       "      <td>1011.6</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>17.7</td>\n",
       "      <td>21.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>179</th>\n",
       "      <td>2015-12-20</td>\n",
       "      <td>Woomera</td>\n",
       "      <td>27.1</td>\n",
       "      <td>31.3</td>\n",
       "      <td>0.4</td>\n",
       "      <td>18.0</td>\n",
       "      <td>2.1</td>\n",
       "      <td>SE</td>\n",
       "      <td>56.0</td>\n",
       "      <td>ENE</td>\n",
       "      <td>...</td>\n",
       "      <td>11.0</td>\n",
       "      <td>35.0</td>\n",
       "      <td>37.0</td>\n",
       "      <td>41.0</td>\n",
       "      <td>1007.7</td>\n",
       "      <td>1007.7</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>30.4</td>\n",
       "      <td>29.7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>212</th>\n",
       "      <td>2009-06-25</td>\n",
       "      <td>Woomera</td>\n",
       "      <td>9.1</td>\n",
       "      <td>18.4</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3.8</td>\n",
       "      <td>4.4</td>\n",
       "      <td>N</td>\n",
       "      <td>41.0</td>\n",
       "      <td>NNE</td>\n",
       "      <td>...</td>\n",
       "      <td>19.0</td>\n",
       "      <td>20.0</td>\n",
       "      <td>54.0</td>\n",
       "      <td>90.0</td>\n",
       "      <td>1012.9</td>\n",
       "      <td>1008.8</td>\n",
       "      <td>6.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>13.2</td>\n",
       "      <td>14.2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1392</th>\n",
       "      <td>2015-11-06</td>\n",
       "      <td>Woomera</td>\n",
       "      <td>13.1</td>\n",
       "      <td>28.3</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>SE</td>\n",
       "      <td>33.0</td>\n",
       "      <td>SSE</td>\n",
       "      <td>...</td>\n",
       "      <td>19.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>65.0</td>\n",
       "      <td>36.0</td>\n",
       "      <td>1015.9</td>\n",
       "      <td>1013.2</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>17.6</td>\n",
       "      <td>26.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1436</th>\n",
       "      <td>2009-10-19</td>\n",
       "      <td>Woomera</td>\n",
       "      <td>12.6</td>\n",
       "      <td>30.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>7.8</td>\n",
       "      <td>12.3</td>\n",
       "      <td>N</td>\n",
       "      <td>44.0</td>\n",
       "      <td>N</td>\n",
       "      <td>...</td>\n",
       "      <td>13.0</td>\n",
       "      <td>22.0</td>\n",
       "      <td>20.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>1025.5</td>\n",
       "      <td>1021.3</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>19.9</td>\n",
       "      <td>28.7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1431</th>\n",
       "      <td>2012-01-16</td>\n",
       "      <td>Woomera</td>\n",
       "      <td>18.9</td>\n",
       "      <td>38.8</td>\n",
       "      <td>0.0</td>\n",
       "      <td>12.0</td>\n",
       "      <td>12.4</td>\n",
       "      <td>N</td>\n",
       "      <td>41.0</td>\n",
       "      <td>ENE</td>\n",
       "      <td>...</td>\n",
       "      <td>20.0</td>\n",
       "      <td>20.0</td>\n",
       "      <td>10.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>1012.0</td>\n",
       "      <td>1008.7</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>27.6</td>\n",
       "      <td>37.9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>204</th>\n",
       "      <td>2014-02-05</td>\n",
       "      <td>Woomera</td>\n",
       "      <td>23.1</td>\n",
       "      <td>38.6</td>\n",
       "      <td>0.0</td>\n",
       "      <td>19.8</td>\n",
       "      <td>6.3</td>\n",
       "      <td>SE</td>\n",
       "      <td>37.0</td>\n",
       "      <td>ESE</td>\n",
       "      <td>...</td>\n",
       "      <td>17.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>18.0</td>\n",
       "      <td>11.0</td>\n",
       "      <td>1012.1</td>\n",
       "      <td>1009.9</td>\n",
       "      <td>3.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>26.8</td>\n",
       "      <td>36.2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>225</th>\n",
       "      <td>2013-08-23</td>\n",
       "      <td>Woomera</td>\n",
       "      <td>6.5</td>\n",
       "      <td>17.7</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3.8</td>\n",
       "      <td>3.6</td>\n",
       "      <td>SW</td>\n",
       "      <td>43.0</td>\n",
       "      <td>WSW</td>\n",
       "      <td>...</td>\n",
       "      <td>22.0</td>\n",
       "      <td>26.0</td>\n",
       "      <td>76.0</td>\n",
       "      <td>46.0</td>\n",
       "      <td>1024.2</td>\n",
       "      <td>1022.9</td>\n",
       "      <td>1.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>13.2</td>\n",
       "      <td>17.3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>400</th>\n",
       "      <td>2016-02-09</td>\n",
       "      <td>Woomera</td>\n",
       "      <td>20.2</td>\n",
       "      <td>39.4</td>\n",
       "      <td>0.0</td>\n",
       "      <td>17.6</td>\n",
       "      <td>12.8</td>\n",
       "      <td>SSE</td>\n",
       "      <td>43.0</td>\n",
       "      <td>SE</td>\n",
       "      <td>...</td>\n",
       "      <td>24.0</td>\n",
       "      <td>9.0</td>\n",
       "      <td>50.0</td>\n",
       "      <td>12.0</td>\n",
       "      <td>1016.8</td>\n",
       "      <td>1013.3</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>24.0</td>\n",
       "      <td>37.7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1174</th>\n",
       "      <td>2011-07-27</td>\n",
       "      <td>Woomera</td>\n",
       "      <td>4.9</td>\n",
       "      <td>20.9</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3.2</td>\n",
       "      <td>10.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>11.0</td>\n",
       "      <td>54.0</td>\n",
       "      <td>29.0</td>\n",
       "      <td>1026.8</td>\n",
       "      <td>1024.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>11.7</td>\n",
       "      <td>20.3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3413</th>\n",
       "      <td>2009-03-27</td>\n",
       "      <td>Woomera</td>\n",
       "      <td>15.2</td>\n",
       "      <td>30.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>8.5</td>\n",
       "      <td>11.2</td>\n",
       "      <td>SSE</td>\n",
       "      <td>43.0</td>\n",
       "      <td>SSE</td>\n",
       "      <td>...</td>\n",
       "      <td>20.0</td>\n",
       "      <td>17.0</td>\n",
       "      <td>52.0</td>\n",
       "      <td>20.0</td>\n",
       "      <td>1024.9</td>\n",
       "      <td>1021.6</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>16.9</td>\n",
       "      <td>28.3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>396</th>\n",
       "      <td>2013-01-01</td>\n",
       "      <td>Woomera</td>\n",
       "      <td>21.5</td>\n",
       "      <td>40.4</td>\n",
       "      <td>0.0</td>\n",
       "      <td>42.2</td>\n",
       "      <td>NaN</td>\n",
       "      <td>SE</td>\n",
       "      <td>54.0</td>\n",
       "      <td>ESE</td>\n",
       "      <td>...</td>\n",
       "      <td>20.0</td>\n",
       "      <td>26.0</td>\n",
       "      <td>17.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>1014.5</td>\n",
       "      <td>1012.1</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>28.1</td>\n",
       "      <td>39.6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2867</th>\n",
       "      <td>2009-10-07</td>\n",
       "      <td>Woomera</td>\n",
       "      <td>9.0</td>\n",
       "      <td>21.5</td>\n",
       "      <td>0.0</td>\n",
       "      <td>7.8</td>\n",
       "      <td>11.9</td>\n",
       "      <td>SE</td>\n",
       "      <td>35.0</td>\n",
       "      <td>SE</td>\n",
       "      <td>...</td>\n",
       "      <td>22.0</td>\n",
       "      <td>15.0</td>\n",
       "      <td>52.0</td>\n",
       "      <td>20.0</td>\n",
       "      <td>1025.6</td>\n",
       "      <td>1024.3</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>12.4</td>\n",
       "      <td>19.9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1741</th>\n",
       "      <td>2010-03-12</td>\n",
       "      <td>Woomera</td>\n",
       "      <td>14.0</td>\n",
       "      <td>30.7</td>\n",
       "      <td>0.0</td>\n",
       "      <td>12.0</td>\n",
       "      <td>11.4</td>\n",
       "      <td>E</td>\n",
       "      <td>37.0</td>\n",
       "      <td>SE</td>\n",
       "      <td>...</td>\n",
       "      <td>20.0</td>\n",
       "      <td>13.0</td>\n",
       "      <td>42.0</td>\n",
       "      <td>26.0</td>\n",
       "      <td>1028.3</td>\n",
       "      <td>1025.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>17.9</td>\n",
       "      <td>28.9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2380</th>\n",
       "      <td>2013-01-15</td>\n",
       "      <td>Woomera</td>\n",
       "      <td>19.0</td>\n",
       "      <td>36.3</td>\n",
       "      <td>0.0</td>\n",
       "      <td>13.0</td>\n",
       "      <td>13.0</td>\n",
       "      <td>ENE</td>\n",
       "      <td>33.0</td>\n",
       "      <td>E</td>\n",
       "      <td>...</td>\n",
       "      <td>24.0</td>\n",
       "      <td>15.0</td>\n",
       "      <td>17.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>1014.2</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>25.6</td>\n",
       "      <td>35.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>975</th>\n",
       "      <td>2015-02-26</td>\n",
       "      <td>Woomera</td>\n",
       "      <td>17.5</td>\n",
       "      <td>36.5</td>\n",
       "      <td>0.0</td>\n",
       "      <td>15.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>SSE</td>\n",
       "      <td>39.0</td>\n",
       "      <td>SE</td>\n",
       "      <td>...</td>\n",
       "      <td>20.0</td>\n",
       "      <td>20.0</td>\n",
       "      <td>37.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>1011.3</td>\n",
       "      <td>1008.5</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>23.1</td>\n",
       "      <td>34.7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1716</th>\n",
       "      <td>2016-12-19</td>\n",
       "      <td>Woomera</td>\n",
       "      <td>18.9</td>\n",
       "      <td>33.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>10.8</td>\n",
       "      <td>NaN</td>\n",
       "      <td>WSW</td>\n",
       "      <td>46.0</td>\n",
       "      <td>NNE</td>\n",
       "      <td>...</td>\n",
       "      <td>26.0</td>\n",
       "      <td>24.0</td>\n",
       "      <td>20.0</td>\n",
       "      <td>32.0</td>\n",
       "      <td>1012.1</td>\n",
       "      <td>1009.8</td>\n",
       "      <td>5.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>27.2</td>\n",
       "      <td>29.7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2569</th>\n",
       "      <td>2009-08-06</td>\n",
       "      <td>Woomera</td>\n",
       "      <td>10.7</td>\n",
       "      <td>25.1</td>\n",
       "      <td>0.0</td>\n",
       "      <td>9.6</td>\n",
       "      <td>10.4</td>\n",
       "      <td>NNW</td>\n",
       "      <td>57.0</td>\n",
       "      <td>N</td>\n",
       "      <td>...</td>\n",
       "      <td>24.0</td>\n",
       "      <td>37.0</td>\n",
       "      <td>32.0</td>\n",
       "      <td>12.0</td>\n",
       "      <td>1017.6</td>\n",
       "      <td>1012.5</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>15.8</td>\n",
       "      <td>24.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>355</th>\n",
       "      <td>2013-06-23</td>\n",
       "      <td>Woomera</td>\n",
       "      <td>7.8</td>\n",
       "      <td>16.0</td>\n",
       "      <td>1.2</td>\n",
       "      <td>3.2</td>\n",
       "      <td>7.4</td>\n",
       "      <td>W</td>\n",
       "      <td>37.0</td>\n",
       "      <td>WNW</td>\n",
       "      <td>...</td>\n",
       "      <td>19.0</td>\n",
       "      <td>26.0</td>\n",
       "      <td>97.0</td>\n",
       "      <td>54.0</td>\n",
       "      <td>1020.9</td>\n",
       "      <td>1019.4</td>\n",
       "      <td>5.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>9.7</td>\n",
       "      <td>15.6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>592</th>\n",
       "      <td>2013-04-03</td>\n",
       "      <td>Woomera</td>\n",
       "      <td>15.2</td>\n",
       "      <td>26.9</td>\n",
       "      <td>0.0</td>\n",
       "      <td>6.8</td>\n",
       "      <td>10.7</td>\n",
       "      <td>SSE</td>\n",
       "      <td>41.0</td>\n",
       "      <td>SE</td>\n",
       "      <td>...</td>\n",
       "      <td>22.0</td>\n",
       "      <td>13.0</td>\n",
       "      <td>82.0</td>\n",
       "      <td>33.0</td>\n",
       "      <td>1023.3</td>\n",
       "      <td>1021.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>17.4</td>\n",
       "      <td>26.2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>343</th>\n",
       "      <td>2009-12-06</td>\n",
       "      <td>Woomera</td>\n",
       "      <td>16.1</td>\n",
       "      <td>33.1</td>\n",
       "      <td>0.0</td>\n",
       "      <td>12.8</td>\n",
       "      <td>13.1</td>\n",
       "      <td>SSW</td>\n",
       "      <td>39.0</td>\n",
       "      <td>ESE</td>\n",
       "      <td>...</td>\n",
       "      <td>17.0</td>\n",
       "      <td>17.0</td>\n",
       "      <td>42.0</td>\n",
       "      <td>12.0</td>\n",
       "      <td>1016.7</td>\n",
       "      <td>1013.4</td>\n",
       "      <td>4.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>22.8</td>\n",
       "      <td>32.1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1994</th>\n",
       "      <td>2015-06-06</td>\n",
       "      <td>Woomera</td>\n",
       "      <td>6.6</td>\n",
       "      <td>20.2</td>\n",
       "      <td>0.0</td>\n",
       "      <td>6.6</td>\n",
       "      <td>9.0</td>\n",
       "      <td>NNE</td>\n",
       "      <td>31.0</td>\n",
       "      <td>NE</td>\n",
       "      <td>...</td>\n",
       "      <td>11.0</td>\n",
       "      <td>17.0</td>\n",
       "      <td>65.0</td>\n",
       "      <td>39.0</td>\n",
       "      <td>1030.1</td>\n",
       "      <td>1026.3</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>12.7</td>\n",
       "      <td>19.4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2811</th>\n",
       "      <td>2009-03-23</td>\n",
       "      <td>Woomera</td>\n",
       "      <td>13.4</td>\n",
       "      <td>30.7</td>\n",
       "      <td>0.0</td>\n",
       "      <td>14.6</td>\n",
       "      <td>10.8</td>\n",
       "      <td>SSE</td>\n",
       "      <td>35.0</td>\n",
       "      <td>SE</td>\n",
       "      <td>...</td>\n",
       "      <td>26.0</td>\n",
       "      <td>9.0</td>\n",
       "      <td>48.0</td>\n",
       "      <td>16.0</td>\n",
       "      <td>1015.1</td>\n",
       "      <td>1012.2</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>16.6</td>\n",
       "      <td>28.6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1629</th>\n",
       "      <td>2010-07-17</td>\n",
       "      <td>Woomera</td>\n",
       "      <td>5.1</td>\n",
       "      <td>17.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>9.5</td>\n",
       "      <td>N</td>\n",
       "      <td>37.0</td>\n",
       "      <td>NNE</td>\n",
       "      <td>...</td>\n",
       "      <td>15.0</td>\n",
       "      <td>15.0</td>\n",
       "      <td>72.0</td>\n",
       "      <td>43.0</td>\n",
       "      <td>1026.4</td>\n",
       "      <td>1022.4</td>\n",
       "      <td>3.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>9.4</td>\n",
       "      <td>15.8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>320</th>\n",
       "      <td>2014-11-04</td>\n",
       "      <td>Woomera</td>\n",
       "      <td>17.4</td>\n",
       "      <td>29.8</td>\n",
       "      <td>0.0</td>\n",
       "      <td>12.0</td>\n",
       "      <td>11.0</td>\n",
       "      <td>S</td>\n",
       "      <td>54.0</td>\n",
       "      <td>WNW</td>\n",
       "      <td>...</td>\n",
       "      <td>13.0</td>\n",
       "      <td>30.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>24.0</td>\n",
       "      <td>1016.2</td>\n",
       "      <td>1016.1</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>25.7</td>\n",
       "      <td>28.1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1072</th>\n",
       "      <td>2016-12-04</td>\n",
       "      <td>Woomera</td>\n",
       "      <td>24.2</td>\n",
       "      <td>38.7</td>\n",
       "      <td>0.0</td>\n",
       "      <td>10.6</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NNW</td>\n",
       "      <td>80.0</td>\n",
       "      <td>SSW</td>\n",
       "      <td>...</td>\n",
       "      <td>13.0</td>\n",
       "      <td>24.0</td>\n",
       "      <td>43.0</td>\n",
       "      <td>23.0</td>\n",
       "      <td>1008.4</td>\n",
       "      <td>1004.5</td>\n",
       "      <td>7.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>28.3</td>\n",
       "      <td>36.4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2223</th>\n",
       "      <td>2009-05-08</td>\n",
       "      <td>Woomera</td>\n",
       "      <td>9.2</td>\n",
       "      <td>20.6</td>\n",
       "      <td>0.0</td>\n",
       "      <td>5.2</td>\n",
       "      <td>10.4</td>\n",
       "      <td>ESE</td>\n",
       "      <td>37.0</td>\n",
       "      <td>SE</td>\n",
       "      <td>...</td>\n",
       "      <td>19.0</td>\n",
       "      <td>19.0</td>\n",
       "      <td>64.0</td>\n",
       "      <td>34.0</td>\n",
       "      <td>1030.5</td>\n",
       "      <td>1026.9</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>13.7</td>\n",
       "      <td>20.1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1984</th>\n",
       "      <td>2014-05-26</td>\n",
       "      <td>Woomera</td>\n",
       "      <td>15.5</td>\n",
       "      <td>23.6</td>\n",
       "      <td>0.0</td>\n",
       "      <td>24.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NNW</td>\n",
       "      <td>43.0</td>\n",
       "      <td>NNE</td>\n",
       "      <td>...</td>\n",
       "      <td>9.0</td>\n",
       "      <td>26.0</td>\n",
       "      <td>49.0</td>\n",
       "      <td>37.0</td>\n",
       "      <td>1014.2</td>\n",
       "      <td>1010.3</td>\n",
       "      <td>7.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>18.0</td>\n",
       "      <td>21.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1592</th>\n",
       "      <td>2012-01-10</td>\n",
       "      <td>Woomera</td>\n",
       "      <td>16.8</td>\n",
       "      <td>26.7</td>\n",
       "      <td>0.0</td>\n",
       "      <td>10.0</td>\n",
       "      <td>5.3</td>\n",
       "      <td>SW</td>\n",
       "      <td>46.0</td>\n",
       "      <td>S</td>\n",
       "      <td>...</td>\n",
       "      <td>20.0</td>\n",
       "      <td>22.0</td>\n",
       "      <td>52.0</td>\n",
       "      <td>33.0</td>\n",
       "      <td>1019.1</td>\n",
       "      <td>1016.8</td>\n",
       "      <td>4.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>18.3</td>\n",
       "      <td>24.9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2824</th>\n",
       "      <td>2015-11-03</td>\n",
       "      <td>Woomera</td>\n",
       "      <td>16.2</td>\n",
       "      <td>28.5</td>\n",
       "      <td>7.8</td>\n",
       "      <td>4.2</td>\n",
       "      <td>4.5</td>\n",
       "      <td>WSW</td>\n",
       "      <td>80.0</td>\n",
       "      <td>NE</td>\n",
       "      <td>...</td>\n",
       "      <td>26.0</td>\n",
       "      <td>50.0</td>\n",
       "      <td>76.0</td>\n",
       "      <td>53.0</td>\n",
       "      <td>1009.6</td>\n",
       "      <td>1006.8</td>\n",
       "      <td>6.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>20.5</td>\n",
       "      <td>26.2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1005</th>\n",
       "      <td>2010-05-14</td>\n",
       "      <td>Woomera</td>\n",
       "      <td>3.9</td>\n",
       "      <td>19.3</td>\n",
       "      <td>0.0</td>\n",
       "      <td>5.8</td>\n",
       "      <td>10.5</td>\n",
       "      <td>NE</td>\n",
       "      <td>33.0</td>\n",
       "      <td>ENE</td>\n",
       "      <td>...</td>\n",
       "      <td>15.0</td>\n",
       "      <td>13.0</td>\n",
       "      <td>43.0</td>\n",
       "      <td>19.0</td>\n",
       "      <td>1020.2</td>\n",
       "      <td>1016.4</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>11.5</td>\n",
       "      <td>18.5</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>3500 rows × 21 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "            Date  Location  MinTemp  MaxTemp  Rainfall  Evaporation  Sunshine  \\\n",
       "2796  2015-03-24  Adelaide     12.3     19.3       0.0          5.0       NaN   \n",
       "2975  2012-08-17  Adelaide      7.8     13.2      17.6          0.8       NaN   \n",
       "775   2013-03-16  Adelaide     17.4     23.8       NaN          NaN       9.7   \n",
       "861   2011-07-12  Adelaide      7.9     11.4       0.0          1.0       0.5   \n",
       "2906  2015-08-24  Adelaide      9.2     14.3       0.0          NaN       NaN   \n",
       "2900  2009-09-17  Adelaide     14.2     17.4       8.8          2.0       7.1   \n",
       "902   2008-12-10  Adelaide     14.2     28.0       0.0          6.4      12.5   \n",
       "919   2011-10-12  Adelaide      7.7     19.9       0.0          3.4      11.4   \n",
       "936   2014-03-11  Adelaide     22.3     32.2       0.4         20.6       3.2   \n",
       "942   2015-02-23  Adelaide     20.6     26.5       0.0         16.2       NaN   \n",
       "999   2009-11-30  Adelaide     13.1     22.4       0.6         13.6      10.4   \n",
       "1013  2012-08-23  Adelaide      8.1     12.4       6.0          1.8       NaN   \n",
       "1042  2008-11-03  Adelaide     13.3     21.2       0.0         15.2      10.0   \n",
       "701   2009-01-08  Adelaide     14.3     23.8       0.0          7.4      12.7   \n",
       "109   2012-05-02  Adelaide      9.4     16.5      12.4          0.8       NaN   \n",
       "227   2016-08-31  Adelaide     11.9     16.8       1.0          NaN       NaN   \n",
       "1190  2015-07-01  Adelaide      4.7     14.5       0.0          NaN       NaN   \n",
       "2701  2013-05-16  Adelaide     10.7     17.5       7.0          1.4       7.1   \n",
       "1227  2016-06-22  Adelaide     10.2     16.5      11.8          NaN       NaN   \n",
       "1234  2017-01-09  Adelaide     20.2     30.4       0.0          NaN       NaN   \n",
       "1266  2008-11-07  Adelaide     18.3     22.5       0.2          8.0       1.4   \n",
       "1282  2012-09-03  Adelaide      7.7     22.9       0.0          7.0       NaN   \n",
       "1289  2010-03-09  Adelaide     14.3     22.0       1.6         16.4       7.8   \n",
       "86    2010-11-08  Adelaide     13.7     23.1       0.0         16.0      12.2   \n",
       "83    2012-10-08  Adelaide     10.2     23.6       0.2         10.0       NaN   \n",
       "1356  2014-03-05  Adelaide     16.7     24.8       0.0          6.6      11.7   \n",
       "1390  2015-10-04  Adelaide     15.4     29.7       0.0          NaN       NaN   \n",
       "107   2015-10-01  Adelaide      7.8     24.4       0.0          NaN       NaN   \n",
       "3403  2015-12-05  Adelaide     20.7     40.2       0.0          NaN       NaN   \n",
       "3023  2012-11-06  Adelaide     14.7     22.8       8.0          3.2       NaN   \n",
       "...          ...       ...      ...      ...       ...          ...       ...   \n",
       "179   2015-12-20   Woomera     27.1     31.3       0.4         18.0       2.1   \n",
       "212   2009-06-25   Woomera      9.1     18.4       0.0          3.8       4.4   \n",
       "1392  2015-11-06   Woomera     13.1     28.3       0.0          NaN       NaN   \n",
       "1436  2009-10-19   Woomera     12.6     30.0       0.0          7.8      12.3   \n",
       "1431  2012-01-16   Woomera     18.9     38.8       0.0         12.0      12.4   \n",
       "204   2014-02-05   Woomera     23.1     38.6       0.0         19.8       6.3   \n",
       "225   2013-08-23   Woomera      6.5     17.7       0.0          3.8       3.6   \n",
       "400   2016-02-09   Woomera     20.2     39.4       0.0         17.6      12.8   \n",
       "1174  2011-07-27   Woomera      4.9     20.9       0.0          3.2      10.0   \n",
       "3413  2009-03-27   Woomera     15.2     30.0       0.0          8.5      11.2   \n",
       "396   2013-01-01   Woomera     21.5     40.4       0.0         42.2       NaN   \n",
       "2867  2009-10-07   Woomera      9.0     21.5       0.0          7.8      11.9   \n",
       "1741  2010-03-12   Woomera     14.0     30.7       0.0         12.0      11.4   \n",
       "2380  2013-01-15   Woomera     19.0     36.3       0.0         13.0      13.0   \n",
       "975   2015-02-26   Woomera     17.5     36.5       0.0         15.0       NaN   \n",
       "1716  2016-12-19   Woomera     18.9     33.0       0.0         10.8       NaN   \n",
       "2569  2009-08-06   Woomera     10.7     25.1       0.0          9.6      10.4   \n",
       "355   2013-06-23   Woomera      7.8     16.0       1.2          3.2       7.4   \n",
       "592   2013-04-03   Woomera     15.2     26.9       0.0          6.8      10.7   \n",
       "343   2009-12-06   Woomera     16.1     33.1       0.0         12.8      13.1   \n",
       "1994  2015-06-06   Woomera      6.6     20.2       0.0          6.6       9.0   \n",
       "2811  2009-03-23   Woomera     13.4     30.7       0.0         14.6      10.8   \n",
       "1629  2010-07-17   Woomera      5.1     17.0       0.0          2.0       9.5   \n",
       "320   2014-11-04   Woomera     17.4     29.8       0.0         12.0      11.0   \n",
       "1072  2016-12-04   Woomera     24.2     38.7       0.0         10.6       NaN   \n",
       "2223  2009-05-08   Woomera      9.2     20.6       0.0          5.2      10.4   \n",
       "1984  2014-05-26   Woomera     15.5     23.6       0.0         24.0       NaN   \n",
       "1592  2012-01-10   Woomera     16.8     26.7       0.0         10.0       5.3   \n",
       "2824  2015-11-03   Woomera     16.2     28.5       7.8          4.2       4.5   \n",
       "1005  2010-05-14   Woomera      3.9     19.3       0.0          5.8      10.5   \n",
       "\n",
       "     WindGustDir  WindGustSpeed WindDir9am   ...    WindSpeed9am  \\\n",
       "2796           S           39.0          S   ...            13.0   \n",
       "2975          SW           61.0         SW   ...            20.0   \n",
       "775          SSE           46.0          S   ...             9.0   \n",
       "861            N           20.0        NNE   ...             7.0   \n",
       "2906          SE           48.0         SE   ...            17.0   \n",
       "2900          SW           41.0        SSW   ...            15.0   \n",
       "902           SE           48.0          E   ...            13.0   \n",
       "919            W           30.0          E   ...            13.0   \n",
       "936            W           65.0        ESE   ...             9.0   \n",
       "942          SSE           48.0          S   ...            20.0   \n",
       "999          SSE           37.0        SSE   ...            17.0   \n",
       "1013         WNW           74.0         NW   ...            26.0   \n",
       "1042         SSE           39.0        SSW   ...            15.0   \n",
       "701           SE           37.0         SE   ...            17.0   \n",
       "109          SSE           39.0          S   ...             9.0   \n",
       "227           SW           28.0        WNW   ...            11.0   \n",
       "1190         WSW           20.0         NE   ...             6.0   \n",
       "2701          SW           35.0         NE   ...             6.0   \n",
       "1227         WNW           39.0          W   ...             9.0   \n",
       "1234          SW           24.0        ESE   ...             6.0   \n",
       "1266         WNW           56.0          N   ...            17.0   \n",
       "1282          SE           56.0          N   ...            13.0   \n",
       "1289          SW           39.0          S   ...            11.0   \n",
       "86             W           31.0        WNW   ...            13.0   \n",
       "83             E           33.0        NNE   ...            11.0   \n",
       "1356           S           37.0          S   ...            15.0   \n",
       "1390         WNW           20.0        NaN   ...             0.0   \n",
       "107          WSW           22.0          N   ...             9.0   \n",
       "3403          SE           31.0         NE   ...             9.0   \n",
       "3023          NW           31.0          W   ...            17.0   \n",
       "...          ...            ...        ...   ...             ...   \n",
       "179           SE           56.0        ENE   ...            11.0   \n",
       "212            N           41.0        NNE   ...            19.0   \n",
       "1392          SE           33.0        SSE   ...            19.0   \n",
       "1436           N           44.0          N   ...            13.0   \n",
       "1431           N           41.0        ENE   ...            20.0   \n",
       "204           SE           37.0        ESE   ...            17.0   \n",
       "225           SW           43.0        WSW   ...            22.0   \n",
       "400          SSE           43.0         SE   ...            24.0   \n",
       "1174         NaN            NaN        NaN   ...             0.0   \n",
       "3413         SSE           43.0        SSE   ...            20.0   \n",
       "396           SE           54.0        ESE   ...            20.0   \n",
       "2867          SE           35.0         SE   ...            22.0   \n",
       "1741           E           37.0         SE   ...            20.0   \n",
       "2380         ENE           33.0          E   ...            24.0   \n",
       "975          SSE           39.0         SE   ...            20.0   \n",
       "1716         WSW           46.0        NNE   ...            26.0   \n",
       "2569         NNW           57.0          N   ...            24.0   \n",
       "355            W           37.0        WNW   ...            19.0   \n",
       "592          SSE           41.0         SE   ...            22.0   \n",
       "343          SSW           39.0        ESE   ...            17.0   \n",
       "1994         NNE           31.0         NE   ...            11.0   \n",
       "2811         SSE           35.0         SE   ...            26.0   \n",
       "1629           N           37.0        NNE   ...            15.0   \n",
       "320            S           54.0        WNW   ...            13.0   \n",
       "1072         NNW           80.0        SSW   ...            13.0   \n",
       "2223         ESE           37.0         SE   ...            19.0   \n",
       "1984         NNW           43.0        NNE   ...             9.0   \n",
       "1592          SW           46.0          S   ...            20.0   \n",
       "2824         WSW           80.0         NE   ...            26.0   \n",
       "1005          NE           33.0        ENE   ...            15.0   \n",
       "\n",
       "      WindSpeed3pm  Humidity9am  Humidity3pm  Pressure9am  Pressure3pm  \\\n",
       "2796          19.0         59.0         47.0       1022.2       1021.4   \n",
       "2975          28.0         76.0         47.0       1012.5       1014.7   \n",
       "775           19.0         63.0         57.0       1019.9       1020.5   \n",
       "861            7.0         70.0         59.0       1028.7       1025.7   \n",
       "2906          19.0         64.0         42.0       1024.7       1024.1   \n",
       "2900          20.0         82.0         56.0       1014.9       1018.0   \n",
       "902           13.0         56.0         35.0       1010.8       1008.9   \n",
       "919           15.0         56.0         43.0       1021.2       1018.1   \n",
       "936            9.0         53.0         78.0       1017.0       1017.2   \n",
       "942           22.0         61.0         43.0       1015.3       1016.5   \n",
       "999           15.0         66.0         43.0       1017.9       1016.8   \n",
       "1013          37.0         74.0         78.0       1002.6       1005.6   \n",
       "1042          20.0         50.0         39.0       1021.9       1020.1   \n",
       "701           22.0         45.0         30.0       1019.9       1019.2   \n",
       "109           15.0         53.0         47.0       1029.6       1028.5   \n",
       "227           13.0         80.0         79.0       1018.5       1017.7   \n",
       "1190          13.0         78.0         48.0       1030.6       1027.6   \n",
       "2701          19.0         88.0         54.0       1016.3       1015.9   \n",
       "1227          17.0         67.0         63.0       1010.3       1010.0   \n",
       "1234           9.0         70.0         38.0       1012.7       1011.3   \n",
       "1266          28.0         58.0         51.0       1001.0       1004.7   \n",
       "1282          17.0         27.0         22.0       1021.1       1018.2   \n",
       "1289          20.0         75.0         36.0       1021.2       1022.5   \n",
       "86            17.0         61.0         38.0       1016.0       1016.3   \n",
       "83            17.0         46.0         28.0       1016.7       1012.9   \n",
       "1356          24.0         61.0         48.0       1019.3       1018.9   \n",
       "1390           9.0         44.0         22.0       1024.0       1022.4   \n",
       "107           15.0         35.0         23.0       1028.4       1028.6   \n",
       "3403          13.0         18.0         13.0       1015.3       1012.1   \n",
       "3023          15.0         64.0         46.0       1011.9       1011.6   \n",
       "...            ...          ...          ...          ...          ...   \n",
       "179           35.0         37.0         41.0       1007.7       1007.7   \n",
       "212           20.0         54.0         90.0       1012.9       1008.8   \n",
       "1392           7.0         65.0         36.0       1015.9       1013.2   \n",
       "1436          22.0         20.0          8.0       1025.5       1021.3   \n",
       "1431          20.0         10.0          5.0       1012.0       1008.7   \n",
       "204            6.0         18.0         11.0       1012.1       1009.9   \n",
       "225           26.0         76.0         46.0       1024.2       1022.9   \n",
       "400            9.0         50.0         12.0       1016.8       1013.3   \n",
       "1174          11.0         54.0         29.0       1026.8       1024.0   \n",
       "3413          17.0         52.0         20.0       1024.9       1021.6   \n",
       "396           26.0         17.0          3.0       1014.5       1012.1   \n",
       "2867          15.0         52.0         20.0       1025.6       1024.3   \n",
       "1741          13.0         42.0         26.0       1028.3       1025.0   \n",
       "2380          15.0         17.0          6.0       1014.2          NaN   \n",
       "975           20.0         37.0          6.0       1011.3       1008.5   \n",
       "1716          24.0         20.0         32.0       1012.1       1009.8   \n",
       "2569          37.0         32.0         12.0       1017.6       1012.5   \n",
       "355           26.0         97.0         54.0       1020.9       1019.4   \n",
       "592           13.0         82.0         33.0       1023.3       1021.0   \n",
       "343           17.0         42.0         12.0       1016.7       1013.4   \n",
       "1994          17.0         65.0         39.0       1030.1       1026.3   \n",
       "2811           9.0         48.0         16.0       1015.1       1012.2   \n",
       "1629          15.0         72.0         43.0       1026.4       1022.4   \n",
       "320           30.0          5.0         24.0       1016.2       1016.1   \n",
       "1072          24.0         43.0         23.0       1008.4       1004.5   \n",
       "2223          19.0         64.0         34.0       1030.5       1026.9   \n",
       "1984          26.0         49.0         37.0       1014.2       1010.3   \n",
       "1592          22.0         52.0         33.0       1019.1       1016.8   \n",
       "2824          50.0         76.0         53.0       1009.6       1006.8   \n",
       "1005          13.0         43.0         19.0       1020.2       1016.4   \n",
       "\n",
       "      Cloud9am  Cloud3pm  Temp9am  Temp3pm  \n",
       "2796       NaN       NaN     15.1     17.7  \n",
       "2975       NaN       NaN      8.3     12.5  \n",
       "775        NaN       NaN     19.1     20.7  \n",
       "861        NaN       NaN      8.4     11.3  \n",
       "2906       NaN       NaN      9.9     13.4  \n",
       "2900       NaN       NaN     16.2     16.7  \n",
       "902        NaN       NaN     19.3     25.6  \n",
       "919        NaN       NaN     14.1     19.4  \n",
       "936        NaN       NaN     25.6     22.8  \n",
       "942        NaN       NaN     22.2     25.7  \n",
       "999        NaN       NaN     16.1     20.9  \n",
       "1013       NaN       NaN     10.7      7.4  \n",
       "1042       NaN       NaN     14.8     19.9  \n",
       "701        NaN       NaN     17.6     23.2  \n",
       "109        NaN       NaN     13.5     14.6  \n",
       "227        NaN       NaN     14.3     15.2  \n",
       "1190       NaN       NaN      8.8     13.1  \n",
       "2701       NaN       NaN     13.1     16.9  \n",
       "1227       NaN       NaN     13.5     15.5  \n",
       "1234       NaN       NaN     20.9     28.9  \n",
       "1266       NaN       NaN     22.4     18.4  \n",
       "1282       NaN       NaN     17.5     22.1  \n",
       "1289       NaN       NaN     16.8     21.6  \n",
       "86         NaN       NaN     17.2     22.5  \n",
       "83         NaN       NaN     15.1     22.6  \n",
       "1356       NaN       NaN     20.8     23.7  \n",
       "1390       NaN       NaN     18.3     27.4  \n",
       "107        NaN       NaN     19.1     23.8  \n",
       "3403       NaN       NaN     33.4     38.6  \n",
       "3023       NaN       NaN     17.7     21.0  \n",
       "...        ...       ...      ...      ...  \n",
       "179        1.0       NaN     30.4     29.7  \n",
       "212        6.0       8.0     13.2     14.2  \n",
       "1392       NaN       NaN     17.6     26.0  \n",
       "1436       0.0       0.0     19.9     28.7  \n",
       "1431       0.0       0.0     27.6     37.9  \n",
       "204        3.0       2.0     26.8     36.2  \n",
       "225        1.0       7.0     13.2     17.3  \n",
       "400        NaN       NaN     24.0     37.7  \n",
       "1174       2.0       0.0     11.7     20.3  \n",
       "3413       0.0       0.0     16.9     28.3  \n",
       "396        0.0       0.0     28.1     39.6  \n",
       "2867       0.0       0.0     12.4     19.9  \n",
       "1741       1.0       1.0     17.9     28.9  \n",
       "2380       2.0       1.0     25.6     35.0  \n",
       "975        1.0       0.0     23.1     34.7  \n",
       "1716       5.0       7.0     27.2     29.7  \n",
       "2569       0.0       0.0     15.8     24.5  \n",
       "355        5.0       6.0      9.7     15.6  \n",
       "592        1.0       4.0     17.4     26.2  \n",
       "343        4.0       6.0     22.8     32.1  \n",
       "1994       0.0       0.0     12.7     19.4  \n",
       "2811       1.0       1.0     16.6     28.6  \n",
       "1629       3.0       4.0      9.4     15.8  \n",
       "320        0.0       0.0     25.7     28.1  \n",
       "1072       7.0       3.0     28.3     36.4  \n",
       "2223       0.0       1.0     13.7     20.1  \n",
       "1984       7.0       7.0     18.0     21.5  \n",
       "1592       4.0       6.0     18.3     24.9  \n",
       "2824       6.0       7.0     20.5     26.2  \n",
       "1005       1.0       1.0     11.5     18.5  \n",
       "\n",
       "[3500 rows x 21 columns]"
      ]
     },
     "execution_count": 35,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "Xtrainc.sort_values(by=\"Location\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [],
   "source": [
    "#我们现在拥有的日期特征，是连续型特征，还是分类型特征\n",
    "#2019-1-6\n",
    "#2019-1-6.5\n",
    "#日期是一年分了365类的分类型变量\n",
    "#我们的日期特征中，日期是否有重复"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "2015-07-03    6\n",
       "2015-10-12    6\n",
       "2014-05-16    6\n",
       "2012-09-18    5\n",
       "2011-11-04    5\n",
       "2009-03-30    5\n",
       "2010-05-18    5\n",
       "2017-01-09    5\n",
       "2016-09-07    5\n",
       "2014-06-16    5\n",
       "2012-11-23    5\n",
       "2014-02-12    5\n",
       "2014-07-26    5\n",
       "2016-11-01    5\n",
       "2013-12-20    5\n",
       "2010-11-03    5\n",
       "2011-09-04    5\n",
       "2011-07-19    5\n",
       "2014-03-12    5\n",
       "2009-07-17    5\n",
       "2009-06-29    5\n",
       "2012-07-18    5\n",
       "2009-08-30    4\n",
       "2016-02-11    4\n",
       "2013-06-30    4\n",
       "2009-03-27    4\n",
       "2010-11-13    4\n",
       "2014-11-16    4\n",
       "2015-03-16    4\n",
       "2014-07-25    4\n",
       "             ..\n",
       "2012-07-31    1\n",
       "2010-02-03    1\n",
       "2010-09-11    1\n",
       "2008-10-30    1\n",
       "2014-10-08    1\n",
       "2009-07-30    1\n",
       "2012-09-07    1\n",
       "2016-08-29    1\n",
       "2012-10-28    1\n",
       "2009-08-16    1\n",
       "2014-03-18    1\n",
       "2014-05-03    1\n",
       "2008-09-16    1\n",
       "2013-07-22    1\n",
       "2016-09-17    1\n",
       "2013-01-28    1\n",
       "2012-08-04    1\n",
       "2017-06-09    1\n",
       "2017-04-03    1\n",
       "2016-09-11    1\n",
       "2015-12-13    1\n",
       "2014-09-24    1\n",
       "2010-05-06    1\n",
       "2014-05-25    1\n",
       "2009-02-10    1\n",
       "2014-08-11    1\n",
       "2011-09-13    1\n",
       "2017-05-16    1\n",
       "2012-01-05    1\n",
       "2015-06-29    1\n",
       "Name: Date, Length: 2141, dtype: int64"
      ]
     },
     "execution_count": 37,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "Xtrain.iloc[:,0].value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [],
   "source": [
    "#不同地点上一段相似的时间的数据"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Date</th>\n",
       "      <th>Location</th>\n",
       "      <th>MinTemp</th>\n",
       "      <th>MaxTemp</th>\n",
       "      <th>Rainfall</th>\n",
       "      <th>Evaporation</th>\n",
       "      <th>Sunshine</th>\n",
       "      <th>WindGustDir</th>\n",
       "      <th>WindGustSpeed</th>\n",
       "      <th>WindDir9am</th>\n",
       "      <th>...</th>\n",
       "      <th>WindSpeed9am</th>\n",
       "      <th>WindSpeed3pm</th>\n",
       "      <th>Humidity9am</th>\n",
       "      <th>Humidity3pm</th>\n",
       "      <th>Pressure9am</th>\n",
       "      <th>Pressure3pm</th>\n",
       "      <th>Cloud9am</th>\n",
       "      <th>Cloud3pm</th>\n",
       "      <th>Temp9am</th>\n",
       "      <th>Temp3pm</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2015-08-24</td>\n",
       "      <td>Katherine</td>\n",
       "      <td>17.5</td>\n",
       "      <td>36.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>8.8</td>\n",
       "      <td>NaN</td>\n",
       "      <td>ESE</td>\n",
       "      <td>26.0</td>\n",
       "      <td>NNW</td>\n",
       "      <td>...</td>\n",
       "      <td>17.0</td>\n",
       "      <td>15.0</td>\n",
       "      <td>57.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1016.8</td>\n",
       "      <td>1012.2</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>27.5</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2906</th>\n",
       "      <td>2015-08-24</td>\n",
       "      <td>Adelaide</td>\n",
       "      <td>9.2</td>\n",
       "      <td>14.3</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>SE</td>\n",
       "      <td>48.0</td>\n",
       "      <td>SE</td>\n",
       "      <td>...</td>\n",
       "      <td>17.0</td>\n",
       "      <td>19.0</td>\n",
       "      <td>64.0</td>\n",
       "      <td>42.0</td>\n",
       "      <td>1024.7</td>\n",
       "      <td>1024.1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>9.9</td>\n",
       "      <td>13.4</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>2 rows × 21 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "            Date   Location  MinTemp  MaxTemp  Rainfall  Evaporation  \\\n",
       "0     2015-08-24  Katherine     17.5     36.0       0.0          8.8   \n",
       "2906  2015-08-24   Adelaide      9.2     14.3       0.0          NaN   \n",
       "\n",
       "      Sunshine WindGustDir  WindGustSpeed WindDir9am   ...    WindSpeed9am  \\\n",
       "0          NaN         ESE           26.0        NNW   ...            17.0   \n",
       "2906       NaN          SE           48.0         SE   ...            17.0   \n",
       "\n",
       "      WindSpeed3pm  Humidity9am  Humidity3pm  Pressure9am  Pressure3pm  \\\n",
       "0             15.0         57.0          NaN       1016.8       1012.2   \n",
       "2906          19.0         64.0         42.0       1024.7       1024.1   \n",
       "\n",
       "      Cloud9am  Cloud3pm  Temp9am  Temp3pm  \n",
       "0          0.0       NaN     27.5      NaN  \n",
       "2906       NaN       NaN      9.9     13.4  \n",
       "\n",
       "[2 rows x 21 columns]"
      ]
     },
     "execution_count": 39,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "Xtrain.loc[Xtrain.iloc[:,0] == \"2015-08-24\",:]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [],
   "source": [
    "#首先，日期不是独一无二的，日期有重复\n",
    "#其次，在我们分训练集和测试集之后，日期也不是连续的，而是分散的\n",
    "#某一年的某一天倾向于会下雨？或者倾向于不会下雨吗？\n",
    "#不是日期影响了下雨与否，反而更多的是这一天的日照时间，湿度，温度等等这些因素影响了是否会下雨\n",
    "#光看日期，其实感觉它对我们的判断并无直接影响\n",
    "#如果我们把它当作连续型变量处理，那算法会人为它是一系列1~3000左右的数字，不会意识到这是日期"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "2141"
      ]
     },
     "execution_count": 41,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "Xtrain.iloc[:,0].value_counts().count()\n",
    "#如果我们把它当作分类型变量处理，类别太多，有2141类，如果换成数值型，会被直接当成连续型变量，如果做成哑变量，我们特征的维度会爆炸"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0      0.0\n",
       "1      0.0\n",
       "2      0.0\n",
       "3      0.0\n",
       "4      0.0\n",
       "5      0.0\n",
       "6      0.0\n",
       "7      0.2\n",
       "8      0.0\n",
       "9      0.2\n",
       "10     1.0\n",
       "11     0.0\n",
       "12     0.2\n",
       "13     0.0\n",
       "14     0.0\n",
       "15     3.0\n",
       "16     0.2\n",
       "17     0.0\n",
       "18    35.2\n",
       "19     0.0\n",
       "Name: Rainfall, dtype: float64"
      ]
     },
     "execution_count": 42,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "Xtrain[\"Rainfall\"].head(20)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "33"
      ]
     },
     "execution_count": 43,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "Xtrain[\"Rainfall\"].isnull().sum()\n",
    "#假设你没有下雨\n",
    "#复制你的空值"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Python\\lib\\site-packages\\pandas\\core\\indexing.py:362: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",
      "  self.obj[key] = _infer_fill_value(value)\n",
      "C:\\Python\\lib\\site-packages\\pandas\\core\\indexing.py:543: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",
      "  self.obj[item] = s\n"
     ]
    }
   ],
   "source": [
    "Xtrain.loc[Xtrain[\"Rainfall\"] >= 1,\"RainToday\"] = \"Yes\"\n",
    "Xtrain.loc[Xtrain[\"Rainfall\"] < 1,\"RainToday\"] = \"No\"\n",
    "Xtrain.loc[Xtrain[\"Rainfall\"] == np.nan,\"RainToday\"] = np.nan"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Date</th>\n",
       "      <th>Location</th>\n",
       "      <th>MinTemp</th>\n",
       "      <th>MaxTemp</th>\n",
       "      <th>Rainfall</th>\n",
       "      <th>Evaporation</th>\n",
       "      <th>Sunshine</th>\n",
       "      <th>WindGustDir</th>\n",
       "      <th>WindGustSpeed</th>\n",
       "      <th>WindDir9am</th>\n",
       "      <th>...</th>\n",
       "      <th>WindSpeed3pm</th>\n",
       "      <th>Humidity9am</th>\n",
       "      <th>Humidity3pm</th>\n",
       "      <th>Pressure9am</th>\n",
       "      <th>Pressure3pm</th>\n",
       "      <th>Cloud9am</th>\n",
       "      <th>Cloud3pm</th>\n",
       "      <th>Temp9am</th>\n",
       "      <th>Temp3pm</th>\n",
       "      <th>RainToday</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2015-08-24</td>\n",
       "      <td>Katherine</td>\n",
       "      <td>17.5</td>\n",
       "      <td>36.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>8.8</td>\n",
       "      <td>NaN</td>\n",
       "      <td>ESE</td>\n",
       "      <td>26.0</td>\n",
       "      <td>NNW</td>\n",
       "      <td>...</td>\n",
       "      <td>15.0</td>\n",
       "      <td>57.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1016.8</td>\n",
       "      <td>1012.2</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>27.5</td>\n",
       "      <td>NaN</td>\n",
       "      <td>No</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2016-12-10</td>\n",
       "      <td>Tuggeranong</td>\n",
       "      <td>9.5</td>\n",
       "      <td>25.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NNW</td>\n",
       "      <td>33.0</td>\n",
       "      <td>NE</td>\n",
       "      <td>...</td>\n",
       "      <td>17.0</td>\n",
       "      <td>59.0</td>\n",
       "      <td>31.0</td>\n",
       "      <td>1020.4</td>\n",
       "      <td>1017.5</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>14.6</td>\n",
       "      <td>23.6</td>\n",
       "      <td>No</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2010-04-18</td>\n",
       "      <td>Albany</td>\n",
       "      <td>13.0</td>\n",
       "      <td>22.6</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3.8</td>\n",
       "      <td>10.4</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NE</td>\n",
       "      <td>...</td>\n",
       "      <td>31.0</td>\n",
       "      <td>79.0</td>\n",
       "      <td>68.0</td>\n",
       "      <td>1020.3</td>\n",
       "      <td>1015.7</td>\n",
       "      <td>1.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>17.5</td>\n",
       "      <td>20.8</td>\n",
       "      <td>No</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2009-11-26</td>\n",
       "      <td>Sale</td>\n",
       "      <td>13.9</td>\n",
       "      <td>29.8</td>\n",
       "      <td>0.0</td>\n",
       "      <td>5.8</td>\n",
       "      <td>5.1</td>\n",
       "      <td>S</td>\n",
       "      <td>37.0</td>\n",
       "      <td>N</td>\n",
       "      <td>...</td>\n",
       "      <td>28.0</td>\n",
       "      <td>82.0</td>\n",
       "      <td>44.0</td>\n",
       "      <td>1012.5</td>\n",
       "      <td>1005.9</td>\n",
       "      <td>6.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>18.5</td>\n",
       "      <td>27.5</td>\n",
       "      <td>No</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2014-04-25</td>\n",
       "      <td>Mildura</td>\n",
       "      <td>6.0</td>\n",
       "      <td>23.5</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.8</td>\n",
       "      <td>8.6</td>\n",
       "      <td>NNE</td>\n",
       "      <td>24.0</td>\n",
       "      <td>E</td>\n",
       "      <td>...</td>\n",
       "      <td>15.0</td>\n",
       "      <td>58.0</td>\n",
       "      <td>35.0</td>\n",
       "      <td>1019.8</td>\n",
       "      <td>1014.1</td>\n",
       "      <td>2.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>12.4</td>\n",
       "      <td>22.4</td>\n",
       "      <td>No</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 22 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "         Date     Location  MinTemp  MaxTemp  Rainfall  Evaporation  Sunshine  \\\n",
       "0  2015-08-24    Katherine     17.5     36.0       0.0          8.8       NaN   \n",
       "1  2016-12-10  Tuggeranong      9.5     25.0       0.0          NaN       NaN   \n",
       "2  2010-04-18       Albany     13.0     22.6       0.0          3.8      10.4   \n",
       "3  2009-11-26         Sale     13.9     29.8       0.0          5.8       5.1   \n",
       "4  2014-04-25      Mildura      6.0     23.5       0.0          2.8       8.6   \n",
       "\n",
       "  WindGustDir  WindGustSpeed WindDir9am    ...     WindSpeed3pm  Humidity9am  \\\n",
       "0         ESE           26.0        NNW    ...             15.0         57.0   \n",
       "1         NNW           33.0         NE    ...             17.0         59.0   \n",
       "2         NaN            NaN         NE    ...             31.0         79.0   \n",
       "3           S           37.0          N    ...             28.0         82.0   \n",
       "4         NNE           24.0          E    ...             15.0         58.0   \n",
       "\n",
       "   Humidity3pm  Pressure9am  Pressure3pm  Cloud9am  Cloud3pm  Temp9am  \\\n",
       "0          NaN       1016.8       1012.2       0.0       NaN     27.5   \n",
       "1         31.0       1020.4       1017.5       NaN       NaN     14.6   \n",
       "2         68.0       1020.3       1015.7       1.0       3.0     17.5   \n",
       "3         44.0       1012.5       1005.9       6.0       6.0     18.5   \n",
       "4         35.0       1019.8       1014.1       2.0       4.0     12.4   \n",
       "\n",
       "   Temp3pm  RainToday  \n",
       "0      NaN         No  \n",
       "1     23.6         No  \n",
       "2     20.8         No  \n",
       "3     27.5         No  \n",
       "4     22.4         No  \n",
       "\n",
       "[5 rows x 22 columns]"
      ]
     },
     "execution_count": 45,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "Xtrain.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "No     2642\n",
       "Yes     825\n",
       "Name: RainToday, dtype: int64"
      ]
     },
     "execution_count": 46,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "Xtrain.loc[:,\"RainToday\"].value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {},
   "outputs": [],
   "source": [
    "Xtest.loc[Xtest[\"Rainfall\"] >= 1,\"RainToday\"] = \"Yes\"\n",
    "Xtest.loc[Xtest[\"Rainfall\"] < 1,\"RainToday\"] = \"No\"\n",
    "Xtest.loc[Xtest[\"Rainfall\"] == np.nan,\"RainToday\"] = np.nan"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Date</th>\n",
       "      <th>Location</th>\n",
       "      <th>MinTemp</th>\n",
       "      <th>MaxTemp</th>\n",
       "      <th>Rainfall</th>\n",
       "      <th>Evaporation</th>\n",
       "      <th>Sunshine</th>\n",
       "      <th>WindGustDir</th>\n",
       "      <th>WindGustSpeed</th>\n",
       "      <th>WindDir9am</th>\n",
       "      <th>...</th>\n",
       "      <th>WindSpeed3pm</th>\n",
       "      <th>Humidity9am</th>\n",
       "      <th>Humidity3pm</th>\n",
       "      <th>Pressure9am</th>\n",
       "      <th>Pressure3pm</th>\n",
       "      <th>Cloud9am</th>\n",
       "      <th>Cloud3pm</th>\n",
       "      <th>Temp9am</th>\n",
       "      <th>Temp3pm</th>\n",
       "      <th>RainToday</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2015-08-24</td>\n",
       "      <td>Katherine</td>\n",
       "      <td>17.5</td>\n",
       "      <td>36.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>8.8</td>\n",
       "      <td>NaN</td>\n",
       "      <td>ESE</td>\n",
       "      <td>26.0</td>\n",
       "      <td>NNW</td>\n",
       "      <td>...</td>\n",
       "      <td>15.0</td>\n",
       "      <td>57.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1016.8</td>\n",
       "      <td>1012.2</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>27.5</td>\n",
       "      <td>NaN</td>\n",
       "      <td>No</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2016-12-10</td>\n",
       "      <td>Tuggeranong</td>\n",
       "      <td>9.5</td>\n",
       "      <td>25.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NNW</td>\n",
       "      <td>33.0</td>\n",
       "      <td>NE</td>\n",
       "      <td>...</td>\n",
       "      <td>17.0</td>\n",
       "      <td>59.0</td>\n",
       "      <td>31.0</td>\n",
       "      <td>1020.4</td>\n",
       "      <td>1017.5</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>14.6</td>\n",
       "      <td>23.6</td>\n",
       "      <td>No</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2010-04-18</td>\n",
       "      <td>Albany</td>\n",
       "      <td>13.0</td>\n",
       "      <td>22.6</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3.8</td>\n",
       "      <td>10.4</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NE</td>\n",
       "      <td>...</td>\n",
       "      <td>31.0</td>\n",
       "      <td>79.0</td>\n",
       "      <td>68.0</td>\n",
       "      <td>1020.3</td>\n",
       "      <td>1015.7</td>\n",
       "      <td>1.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>17.5</td>\n",
       "      <td>20.8</td>\n",
       "      <td>No</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2009-11-26</td>\n",
       "      <td>Sale</td>\n",
       "      <td>13.9</td>\n",
       "      <td>29.8</td>\n",
       "      <td>0.0</td>\n",
       "      <td>5.8</td>\n",
       "      <td>5.1</td>\n",
       "      <td>S</td>\n",
       "      <td>37.0</td>\n",
       "      <td>N</td>\n",
       "      <td>...</td>\n",
       "      <td>28.0</td>\n",
       "      <td>82.0</td>\n",
       "      <td>44.0</td>\n",
       "      <td>1012.5</td>\n",
       "      <td>1005.9</td>\n",
       "      <td>6.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>18.5</td>\n",
       "      <td>27.5</td>\n",
       "      <td>No</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2014-04-25</td>\n",
       "      <td>Mildura</td>\n",
       "      <td>6.0</td>\n",
       "      <td>23.5</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.8</td>\n",
       "      <td>8.6</td>\n",
       "      <td>NNE</td>\n",
       "      <td>24.0</td>\n",
       "      <td>E</td>\n",
       "      <td>...</td>\n",
       "      <td>15.0</td>\n",
       "      <td>58.0</td>\n",
       "      <td>35.0</td>\n",
       "      <td>1019.8</td>\n",
       "      <td>1014.1</td>\n",
       "      <td>2.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>12.4</td>\n",
       "      <td>22.4</td>\n",
       "      <td>No</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 22 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "         Date     Location  MinTemp  MaxTemp  Rainfall  Evaporation  Sunshine  \\\n",
       "0  2015-08-24    Katherine     17.5     36.0       0.0          8.8       NaN   \n",
       "1  2016-12-10  Tuggeranong      9.5     25.0       0.0          NaN       NaN   \n",
       "2  2010-04-18       Albany     13.0     22.6       0.0          3.8      10.4   \n",
       "3  2009-11-26         Sale     13.9     29.8       0.0          5.8       5.1   \n",
       "4  2014-04-25      Mildura      6.0     23.5       0.0          2.8       8.6   \n",
       "\n",
       "  WindGustDir  WindGustSpeed WindDir9am    ...     WindSpeed3pm  Humidity9am  \\\n",
       "0         ESE           26.0        NNW    ...             15.0         57.0   \n",
       "1         NNW           33.0         NE    ...             17.0         59.0   \n",
       "2         NaN            NaN         NE    ...             31.0         79.0   \n",
       "3           S           37.0          N    ...             28.0         82.0   \n",
       "4         NNE           24.0          E    ...             15.0         58.0   \n",
       "\n",
       "   Humidity3pm  Pressure9am  Pressure3pm  Cloud9am  Cloud3pm  Temp9am  \\\n",
       "0          NaN       1016.8       1012.2       0.0       NaN     27.5   \n",
       "1         31.0       1020.4       1017.5       NaN       NaN     14.6   \n",
       "2         68.0       1020.3       1015.7       1.0       3.0     17.5   \n",
       "3         44.0       1012.5       1005.9       6.0       6.0     18.5   \n",
       "4         35.0       1019.8       1014.1       2.0       4.0     12.4   \n",
       "\n",
       "   Temp3pm  RainToday  \n",
       "0      NaN         No  \n",
       "1     23.6         No  \n",
       "2     20.8         No  \n",
       "3     27.5         No  \n",
       "4     22.4         No  \n",
       "\n",
       "[5 rows x 22 columns]"
      ]
     },
     "execution_count": 48,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "Xtrain.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Date</th>\n",
       "      <th>Location</th>\n",
       "      <th>MinTemp</th>\n",
       "      <th>MaxTemp</th>\n",
       "      <th>Rainfall</th>\n",
       "      <th>Evaporation</th>\n",
       "      <th>Sunshine</th>\n",
       "      <th>WindGustDir</th>\n",
       "      <th>WindGustSpeed</th>\n",
       "      <th>WindDir9am</th>\n",
       "      <th>...</th>\n",
       "      <th>WindSpeed3pm</th>\n",
       "      <th>Humidity9am</th>\n",
       "      <th>Humidity3pm</th>\n",
       "      <th>Pressure9am</th>\n",
       "      <th>Pressure3pm</th>\n",
       "      <th>Cloud9am</th>\n",
       "      <th>Cloud3pm</th>\n",
       "      <th>Temp9am</th>\n",
       "      <th>Temp3pm</th>\n",
       "      <th>RainToday</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2016-01-23</td>\n",
       "      <td>NorahHead</td>\n",
       "      <td>22.0</td>\n",
       "      <td>27.8</td>\n",
       "      <td>25.2</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>SSW</td>\n",
       "      <td>57.0</td>\n",
       "      <td>S</td>\n",
       "      <td>...</td>\n",
       "      <td>37.0</td>\n",
       "      <td>91.0</td>\n",
       "      <td>86.0</td>\n",
       "      <td>1006.6</td>\n",
       "      <td>1008.1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>26.2</td>\n",
       "      <td>23.1</td>\n",
       "      <td>Yes</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2009-03-05</td>\n",
       "      <td>MountGambier</td>\n",
       "      <td>12.0</td>\n",
       "      <td>18.6</td>\n",
       "      <td>2.2</td>\n",
       "      <td>3.0</td>\n",
       "      <td>7.8</td>\n",
       "      <td>SW</td>\n",
       "      <td>52.0</td>\n",
       "      <td>SW</td>\n",
       "      <td>...</td>\n",
       "      <td>28.0</td>\n",
       "      <td>88.0</td>\n",
       "      <td>62.0</td>\n",
       "      <td>1020.2</td>\n",
       "      <td>1019.9</td>\n",
       "      <td>8.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>14.8</td>\n",
       "      <td>17.5</td>\n",
       "      <td>Yes</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2010-03-05</td>\n",
       "      <td>MountGinini</td>\n",
       "      <td>9.1</td>\n",
       "      <td>13.3</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NE</td>\n",
       "      <td>41.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2013-10-26</td>\n",
       "      <td>Wollongong</td>\n",
       "      <td>13.1</td>\n",
       "      <td>20.3</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>SW</td>\n",
       "      <td>33.0</td>\n",
       "      <td>W</td>\n",
       "      <td>...</td>\n",
       "      <td>24.0</td>\n",
       "      <td>40.0</td>\n",
       "      <td>51.0</td>\n",
       "      <td>1021.3</td>\n",
       "      <td>1019.5</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>16.8</td>\n",
       "      <td>19.6</td>\n",
       "      <td>No</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2016-11-28</td>\n",
       "      <td>Sale</td>\n",
       "      <td>12.2</td>\n",
       "      <td>20.0</td>\n",
       "      <td>0.4</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>E</td>\n",
       "      <td>33.0</td>\n",
       "      <td>SW</td>\n",
       "      <td>...</td>\n",
       "      <td>19.0</td>\n",
       "      <td>92.0</td>\n",
       "      <td>69.0</td>\n",
       "      <td>1015.6</td>\n",
       "      <td>1013.2</td>\n",
       "      <td>8.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>13.6</td>\n",
       "      <td>19.0</td>\n",
       "      <td>No</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 22 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "         Date      Location  MinTemp  MaxTemp  Rainfall  Evaporation  \\\n",
       "0  2016-01-23     NorahHead     22.0     27.8      25.2          NaN   \n",
       "1  2009-03-05  MountGambier     12.0     18.6       2.2          3.0   \n",
       "2  2010-03-05   MountGinini      9.1     13.3       NaN          NaN   \n",
       "3  2013-10-26    Wollongong     13.1     20.3       0.0          NaN   \n",
       "4  2016-11-28          Sale     12.2     20.0       0.4          NaN   \n",
       "\n",
       "   Sunshine WindGustDir  WindGustSpeed WindDir9am    ...     WindSpeed3pm  \\\n",
       "0       NaN         SSW           57.0          S    ...             37.0   \n",
       "1       7.8          SW           52.0         SW    ...             28.0   \n",
       "2       NaN          NE           41.0        NaN    ...              NaN   \n",
       "3       NaN          SW           33.0          W    ...             24.0   \n",
       "4       NaN           E           33.0         SW    ...             19.0   \n",
       "\n",
       "   Humidity9am  Humidity3pm  Pressure9am  Pressure3pm  Cloud9am  Cloud3pm  \\\n",
       "0         91.0         86.0       1006.6       1008.1       NaN       NaN   \n",
       "1         88.0         62.0       1020.2       1019.9       8.0       7.0   \n",
       "2          NaN          NaN          NaN          NaN       NaN       NaN   \n",
       "3         40.0         51.0       1021.3       1019.5       NaN       NaN   \n",
       "4         92.0         69.0       1015.6       1013.2       8.0       4.0   \n",
       "\n",
       "   Temp9am  Temp3pm  RainToday  \n",
       "0     26.2     23.1        Yes  \n",
       "1     14.8     17.5        Yes  \n",
       "2      NaN      NaN        NaN  \n",
       "3     16.8     19.6         No  \n",
       "4     13.6     19.0         No  \n",
       "\n",
       "[5 rows x 22 columns]"
      ]
     },
     "execution_count": 49,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "Xtest.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "8"
      ]
     },
     "execution_count": 50,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "int(Xtrain.loc[0,\"Date\"].split(\"-\")[1]) #提取出月份"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Python\\lib\\site-packages\\ipykernel_launcher.py:1: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",
      "  \"\"\"Entry point for launching an IPython kernel.\n"
     ]
    }
   ],
   "source": [
    "Xtrain[\"Date\"] = Xtrain[\"Date\"].apply(lambda x:int(x.split(\"-\")[1]))\n",
    "#apply是对dataframe上的某一列进行处理的一个函数\n",
    "#lambda x匿名函数，请在dataframe上这一列中的每一行帮我执行冒号后的命令"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "3     334\n",
       "5     324\n",
       "7     316\n",
       "9     302\n",
       "6     302\n",
       "1     300\n",
       "11    299\n",
       "10    282\n",
       "4     265\n",
       "2     264\n",
       "12    259\n",
       "8     253\n",
       "Name: Date, dtype: int64"
      ]
     },
     "execution_count": 52,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "Xtrain.loc[:,\"Date\"].value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "metadata": {},
   "outputs": [],
   "source": [
    "#替换完毕后，我们需要修改列的名称\n",
    "#rename是比较少用的，可以用来修改单个列名的函数\n",
    "#我们通常都直接使用 df.columns = 某个列表 这样的形式来一次修改所有的列名\n",
    "#但rename允许我们只修改某个单独的列\n",
    "Xtrain = Xtrain.rename(columns={\"Date\":\"Month\"})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Month</th>\n",
       "      <th>Location</th>\n",
       "      <th>MinTemp</th>\n",
       "      <th>MaxTemp</th>\n",
       "      <th>Rainfall</th>\n",
       "      <th>Evaporation</th>\n",
       "      <th>Sunshine</th>\n",
       "      <th>WindGustDir</th>\n",
       "      <th>WindGustSpeed</th>\n",
       "      <th>WindDir9am</th>\n",
       "      <th>...</th>\n",
       "      <th>WindSpeed3pm</th>\n",
       "      <th>Humidity9am</th>\n",
       "      <th>Humidity3pm</th>\n",
       "      <th>Pressure9am</th>\n",
       "      <th>Pressure3pm</th>\n",
       "      <th>Cloud9am</th>\n",
       "      <th>Cloud3pm</th>\n",
       "      <th>Temp9am</th>\n",
       "      <th>Temp3pm</th>\n",
       "      <th>RainToday</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>8</td>\n",
       "      <td>Katherine</td>\n",
       "      <td>17.5</td>\n",
       "      <td>36.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>8.8</td>\n",
       "      <td>NaN</td>\n",
       "      <td>ESE</td>\n",
       "      <td>26.0</td>\n",
       "      <td>NNW</td>\n",
       "      <td>...</td>\n",
       "      <td>15.0</td>\n",
       "      <td>57.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1016.8</td>\n",
       "      <td>1012.2</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>27.5</td>\n",
       "      <td>NaN</td>\n",
       "      <td>No</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>12</td>\n",
       "      <td>Tuggeranong</td>\n",
       "      <td>9.5</td>\n",
       "      <td>25.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NNW</td>\n",
       "      <td>33.0</td>\n",
       "      <td>NE</td>\n",
       "      <td>...</td>\n",
       "      <td>17.0</td>\n",
       "      <td>59.0</td>\n",
       "      <td>31.0</td>\n",
       "      <td>1020.4</td>\n",
       "      <td>1017.5</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>14.6</td>\n",
       "      <td>23.6</td>\n",
       "      <td>No</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>4</td>\n",
       "      <td>Albany</td>\n",
       "      <td>13.0</td>\n",
       "      <td>22.6</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3.8</td>\n",
       "      <td>10.4</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NE</td>\n",
       "      <td>...</td>\n",
       "      <td>31.0</td>\n",
       "      <td>79.0</td>\n",
       "      <td>68.0</td>\n",
       "      <td>1020.3</td>\n",
       "      <td>1015.7</td>\n",
       "      <td>1.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>17.5</td>\n",
       "      <td>20.8</td>\n",
       "      <td>No</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>11</td>\n",
       "      <td>Sale</td>\n",
       "      <td>13.9</td>\n",
       "      <td>29.8</td>\n",
       "      <td>0.0</td>\n",
       "      <td>5.8</td>\n",
       "      <td>5.1</td>\n",
       "      <td>S</td>\n",
       "      <td>37.0</td>\n",
       "      <td>N</td>\n",
       "      <td>...</td>\n",
       "      <td>28.0</td>\n",
       "      <td>82.0</td>\n",
       "      <td>44.0</td>\n",
       "      <td>1012.5</td>\n",
       "      <td>1005.9</td>\n",
       "      <td>6.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>18.5</td>\n",
       "      <td>27.5</td>\n",
       "      <td>No</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4</td>\n",
       "      <td>Mildura</td>\n",
       "      <td>6.0</td>\n",
       "      <td>23.5</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.8</td>\n",
       "      <td>8.6</td>\n",
       "      <td>NNE</td>\n",
       "      <td>24.0</td>\n",
       "      <td>E</td>\n",
       "      <td>...</td>\n",
       "      <td>15.0</td>\n",
       "      <td>58.0</td>\n",
       "      <td>35.0</td>\n",
       "      <td>1019.8</td>\n",
       "      <td>1014.1</td>\n",
       "      <td>2.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>12.4</td>\n",
       "      <td>22.4</td>\n",
       "      <td>No</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 22 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   Month     Location  MinTemp  MaxTemp  Rainfall  Evaporation  Sunshine  \\\n",
       "0      8    Katherine     17.5     36.0       0.0          8.8       NaN   \n",
       "1     12  Tuggeranong      9.5     25.0       0.0          NaN       NaN   \n",
       "2      4       Albany     13.0     22.6       0.0          3.8      10.4   \n",
       "3     11         Sale     13.9     29.8       0.0          5.8       5.1   \n",
       "4      4      Mildura      6.0     23.5       0.0          2.8       8.6   \n",
       "\n",
       "  WindGustDir  WindGustSpeed WindDir9am    ...     WindSpeed3pm  Humidity9am  \\\n",
       "0         ESE           26.0        NNW    ...             15.0         57.0   \n",
       "1         NNW           33.0         NE    ...             17.0         59.0   \n",
       "2         NaN            NaN         NE    ...             31.0         79.0   \n",
       "3           S           37.0          N    ...             28.0         82.0   \n",
       "4         NNE           24.0          E    ...             15.0         58.0   \n",
       "\n",
       "   Humidity3pm  Pressure9am  Pressure3pm  Cloud9am  Cloud3pm  Temp9am  \\\n",
       "0          NaN       1016.8       1012.2       0.0       NaN     27.5   \n",
       "1         31.0       1020.4       1017.5       NaN       NaN     14.6   \n",
       "2         68.0       1020.3       1015.7       1.0       3.0     17.5   \n",
       "3         44.0       1012.5       1005.9       6.0       6.0     18.5   \n",
       "4         35.0       1019.8       1014.1       2.0       4.0     12.4   \n",
       "\n",
       "   Temp3pm  RainToday  \n",
       "0      NaN         No  \n",
       "1     23.6         No  \n",
       "2     20.8         No  \n",
       "3     27.5         No  \n",
       "4     22.4         No  \n",
       "\n",
       "[5 rows x 22 columns]"
      ]
     },
     "execution_count": 54,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "Xtrain.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Python\\lib\\site-packages\\ipykernel_launcher.py:1: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",
      "  \"\"\"Entry point for launching an IPython kernel.\n"
     ]
    }
   ],
   "source": [
    "Xtest[\"Date\"] = Xtest[\"Date\"].apply(lambda x:int(x.split(\"-\")[1]))\n",
    "Xtest = Xtest.rename(columns={\"Date\":\"Month\"})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 56,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Month</th>\n",
       "      <th>Location</th>\n",
       "      <th>MinTemp</th>\n",
       "      <th>MaxTemp</th>\n",
       "      <th>Rainfall</th>\n",
       "      <th>Evaporation</th>\n",
       "      <th>Sunshine</th>\n",
       "      <th>WindGustDir</th>\n",
       "      <th>WindGustSpeed</th>\n",
       "      <th>WindDir9am</th>\n",
       "      <th>...</th>\n",
       "      <th>WindSpeed3pm</th>\n",
       "      <th>Humidity9am</th>\n",
       "      <th>Humidity3pm</th>\n",
       "      <th>Pressure9am</th>\n",
       "      <th>Pressure3pm</th>\n",
       "      <th>Cloud9am</th>\n",
       "      <th>Cloud3pm</th>\n",
       "      <th>Temp9am</th>\n",
       "      <th>Temp3pm</th>\n",
       "      <th>RainToday</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>NorahHead</td>\n",
       "      <td>22.0</td>\n",
       "      <td>27.8</td>\n",
       "      <td>25.2</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>SSW</td>\n",
       "      <td>57.0</td>\n",
       "      <td>S</td>\n",
       "      <td>...</td>\n",
       "      <td>37.0</td>\n",
       "      <td>91.0</td>\n",
       "      <td>86.0</td>\n",
       "      <td>1006.6</td>\n",
       "      <td>1008.1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>26.2</td>\n",
       "      <td>23.1</td>\n",
       "      <td>Yes</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>3</td>\n",
       "      <td>MountGambier</td>\n",
       "      <td>12.0</td>\n",
       "      <td>18.6</td>\n",
       "      <td>2.2</td>\n",
       "      <td>3.0</td>\n",
       "      <td>7.8</td>\n",
       "      <td>SW</td>\n",
       "      <td>52.0</td>\n",
       "      <td>SW</td>\n",
       "      <td>...</td>\n",
       "      <td>28.0</td>\n",
       "      <td>88.0</td>\n",
       "      <td>62.0</td>\n",
       "      <td>1020.2</td>\n",
       "      <td>1019.9</td>\n",
       "      <td>8.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>14.8</td>\n",
       "      <td>17.5</td>\n",
       "      <td>Yes</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>MountGinini</td>\n",
       "      <td>9.1</td>\n",
       "      <td>13.3</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NE</td>\n",
       "      <td>41.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>10</td>\n",
       "      <td>Wollongong</td>\n",
       "      <td>13.1</td>\n",
       "      <td>20.3</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>SW</td>\n",
       "      <td>33.0</td>\n",
       "      <td>W</td>\n",
       "      <td>...</td>\n",
       "      <td>24.0</td>\n",
       "      <td>40.0</td>\n",
       "      <td>51.0</td>\n",
       "      <td>1021.3</td>\n",
       "      <td>1019.5</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>16.8</td>\n",
       "      <td>19.6</td>\n",
       "      <td>No</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>11</td>\n",
       "      <td>Sale</td>\n",
       "      <td>12.2</td>\n",
       "      <td>20.0</td>\n",
       "      <td>0.4</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>E</td>\n",
       "      <td>33.0</td>\n",
       "      <td>SW</td>\n",
       "      <td>...</td>\n",
       "      <td>19.0</td>\n",
       "      <td>92.0</td>\n",
       "      <td>69.0</td>\n",
       "      <td>1015.6</td>\n",
       "      <td>1013.2</td>\n",
       "      <td>8.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>13.6</td>\n",
       "      <td>19.0</td>\n",
       "      <td>No</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 22 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   Month      Location  MinTemp  MaxTemp  Rainfall  Evaporation  Sunshine  \\\n",
       "0      1     NorahHead     22.0     27.8      25.2          NaN       NaN   \n",
       "1      3  MountGambier     12.0     18.6       2.2          3.0       7.8   \n",
       "2      3   MountGinini      9.1     13.3       NaN          NaN       NaN   \n",
       "3     10    Wollongong     13.1     20.3       0.0          NaN       NaN   \n",
       "4     11          Sale     12.2     20.0       0.4          NaN       NaN   \n",
       "\n",
       "  WindGustDir  WindGustSpeed WindDir9am    ...     WindSpeed3pm  Humidity9am  \\\n",
       "0         SSW           57.0          S    ...             37.0         91.0   \n",
       "1          SW           52.0         SW    ...             28.0         88.0   \n",
       "2          NE           41.0        NaN    ...              NaN          NaN   \n",
       "3          SW           33.0          W    ...             24.0         40.0   \n",
       "4           E           33.0         SW    ...             19.0         92.0   \n",
       "\n",
       "   Humidity3pm  Pressure9am  Pressure3pm  Cloud9am  Cloud3pm  Temp9am  \\\n",
       "0         86.0       1006.6       1008.1       NaN       NaN     26.2   \n",
       "1         62.0       1020.2       1019.9       8.0       7.0     14.8   \n",
       "2          NaN          NaN          NaN       NaN       NaN      NaN   \n",
       "3         51.0       1021.3       1019.5       NaN       NaN     16.8   \n",
       "4         69.0       1015.6       1013.2       8.0       4.0     13.6   \n",
       "\n",
       "   Temp3pm  RainToday  \n",
       "0     23.1        Yes  \n",
       "1     17.5        Yes  \n",
       "2      NaN        NaN  \n",
       "3     19.6         No  \n",
       "4     19.0         No  \n",
       "\n",
       "[5 rows x 22 columns]"
      ]
     },
     "execution_count": 56,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "Xtest.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "49"
      ]
     },
     "execution_count": 57,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "Xtrain.loc[:,\"Location\"].value_counts().count()\n",
    "#超过25个类别的分类型变量，都会被算法当成是连续型变量"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 58,
   "metadata": {},
   "outputs": [],
   "source": [
    "cityll = pd.read_csv(r\"C:\\work\\learnbetter\\micro-class\\week 8 SVM (2)\\cityll.csv\",index_col=0)\n",
    "city_climate = pd.read_csv(r\"C:\\work\\learnbetter\\micro-class\\week 8 SVM (2)\\Cityclimate.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 59,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>City</th>\n",
       "      <th>Latitude</th>\n",
       "      <th>Longitude</th>\n",
       "      <th>Latitudedir</th>\n",
       "      <th>Longitudedir</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Adelaide</td>\n",
       "      <td>34.9285°</td>\n",
       "      <td>138.6007°</td>\n",
       "      <td>S,</td>\n",
       "      <td>E</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Albany</td>\n",
       "      <td>35.0275°</td>\n",
       "      <td>117.8840°</td>\n",
       "      <td>S,</td>\n",
       "      <td>E</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Albury</td>\n",
       "      <td>36.0737°</td>\n",
       "      <td>146.9135°</td>\n",
       "      <td>S,</td>\n",
       "      <td>E</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Wodonga</td>\n",
       "      <td>36.1241°</td>\n",
       "      <td>146.8818°</td>\n",
       "      <td>S,</td>\n",
       "      <td>E</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>AliceSprings</td>\n",
       "      <td>23.6980°</td>\n",
       "      <td>133.8807°</td>\n",
       "      <td>S,</td>\n",
       "      <td>E</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "           City  Latitude  Longitude Latitudedir Longitudedir\n",
       "0      Adelaide  34.9285°  138.6007°          S,            E\n",
       "1        Albany  35.0275°  117.8840°          S,            E\n",
       "2        Albury  36.0737°  146.9135°          S,            E\n",
       "3       Wodonga  36.1241°  146.8818°          S,            E\n",
       "4  AliceSprings  23.6980°  133.8807°          S,            E"
      ]
     },
     "execution_count": 59,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cityll.head() #每个城市对应的经纬度，这些城市是澳大利亚统计局做的那张地图上的城市"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 60,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "34.9285"
      ]
     },
     "execution_count": 60,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "float(cityll.loc[0,\"Latitude\"][:-1])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 61,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "S,    100\n",
       "Name: Latitudedir, dtype: int64"
      ]
     },
     "execution_count": 61,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cityll.loc[:,\"Latitudedir\"].value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 62,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>City</th>\n",
       "      <th>Climate</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Adelaide</td>\n",
       "      <td>Warm temperate</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Albany</td>\n",
       "      <td>Mild temperate</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Albury</td>\n",
       "      <td>Hot dry summer, cool winter</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Wodonga</td>\n",
       "      <td>Hot dry summer, cool winter</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>AliceSprings</td>\n",
       "      <td>Hot dry summer, warm winter</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "           City                      Climate\n",
       "0      Adelaide            Warm temperate   \n",
       "1        Albany            Mild temperate   \n",
       "2        Albury  Hot dry summer, cool winter\n",
       "3       Wodonga  Hot dry summer, cool winter\n",
       "4  AliceSprings  Hot dry summer, warm winter"
      ]
     },
     "execution_count": 62,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "city_climate.head() #澳大利亚统计局做的每个城市对应的气候"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 63,
   "metadata": {},
   "outputs": [],
   "source": [
    "#去掉度数符号\n",
    "cityll[\"Latitudenum\"] = cityll[\"Latitude\"].apply(lambda x:float(x[:-1]))\n",
    "cityll[\"Longitudenum\"] = cityll[\"Longitude\"].apply(lambda x:float(x[:-1]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 64,
   "metadata": {},
   "outputs": [],
   "source": [
    "#观察一下所有的经纬度方向都是一致的，全部是南纬，东经，因为澳大利亚在南半球，东半球\n",
    "#所以经纬度的方向我们可以舍弃了\n",
    "citylld = cityll.iloc[:,[0,5,6]]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 65,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>City</th>\n",
       "      <th>Latitudenum</th>\n",
       "      <th>Longitudenum</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Adelaide</td>\n",
       "      <td>34.9285</td>\n",
       "      <td>138.6007</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Albany</td>\n",
       "      <td>35.0275</td>\n",
       "      <td>117.8840</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Albury</td>\n",
       "      <td>36.0737</td>\n",
       "      <td>146.9135</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Wodonga</td>\n",
       "      <td>36.1241</td>\n",
       "      <td>146.8818</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>AliceSprings</td>\n",
       "      <td>23.6980</td>\n",
       "      <td>133.8807</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>Amata</td>\n",
       "      <td>26.1509</td>\n",
       "      <td>131.1467</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>Ballarat</td>\n",
       "      <td>37.5622</td>\n",
       "      <td>143.8503</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>Bathurst</td>\n",
       "      <td>33.4193</td>\n",
       "      <td>149.5775</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>Birdsville</td>\n",
       "      <td>25.8989</td>\n",
       "      <td>139.3517</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>Borroloola</td>\n",
       "      <td>16.0703</td>\n",
       "      <td>136.3072</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>Bourke</td>\n",
       "      <td>30.0907</td>\n",
       "      <td>145.9382</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>Brisbane</td>\n",
       "      <td>27.4698</td>\n",
       "      <td>153.0251</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>BrokenHill</td>\n",
       "      <td>31.9539</td>\n",
       "      <td>141.4539</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>Broome</td>\n",
       "      <td>17.9614</td>\n",
       "      <td>122.2359</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>Bunbury</td>\n",
       "      <td>33.3256</td>\n",
       "      <td>115.6396</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>Burketown</td>\n",
       "      <td>17.8522</td>\n",
       "      <td>139.6332</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>Burra</td>\n",
       "      <td>33.6800</td>\n",
       "      <td>138.9363</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>Cairns</td>\n",
       "      <td>16.9186</td>\n",
       "      <td>145.7781</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>Canberra</td>\n",
       "      <td>35.2809</td>\n",
       "      <td>149.1300</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>Carnarvon</td>\n",
       "      <td>24.8840</td>\n",
       "      <td>113.6610</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>Ceduna</td>\n",
       "      <td>32.1306</td>\n",
       "      <td>133.6817</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>Charleville</td>\n",
       "      <td>26.4021</td>\n",
       "      <td>146.2454</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>CooberPedy</td>\n",
       "      <td>29.0139</td>\n",
       "      <td>134.7533</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>Cooktown</td>\n",
       "      <td>15.4758</td>\n",
       "      <td>145.2471</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>CoffsHarbour</td>\n",
       "      <td>30.2986</td>\n",
       "      <td>153.1094</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>Dampier</td>\n",
       "      <td>20.6582</td>\n",
       "      <td>116.7151</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>Darwin</td>\n",
       "      <td>12.4634</td>\n",
       "      <td>130.8456</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>Derby</td>\n",
       "      <td>17.3179</td>\n",
       "      <td>123.6490</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>Devonport</td>\n",
       "      <td>41.1771</td>\n",
       "      <td>146.3452</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>Dubbo</td>\n",
       "      <td>32.2315</td>\n",
       "      <td>148.6330</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>70</th>\n",
       "      <td>Perth</td>\n",
       "      <td>31.9505</td>\n",
       "      <td>115.8605</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>71</th>\n",
       "      <td>PerthAirport</td>\n",
       "      <td>31.9440</td>\n",
       "      <td>115.9680</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>72</th>\n",
       "      <td>PortHedland</td>\n",
       "      <td>20.3107</td>\n",
       "      <td>118.5878</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>73</th>\n",
       "      <td>PortLincoln</td>\n",
       "      <td>34.7240</td>\n",
       "      <td>135.8611</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>74</th>\n",
       "      <td>PortMacquarie</td>\n",
       "      <td>31.4333</td>\n",
       "      <td>152.9000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75</th>\n",
       "      <td>Renmark</td>\n",
       "      <td>34.1743</td>\n",
       "      <td>140.7443</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>76</th>\n",
       "      <td>Rockhampton</td>\n",
       "      <td>23.3791</td>\n",
       "      <td>150.5100</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>77</th>\n",
       "      <td>Shepparton</td>\n",
       "      <td>36.3833</td>\n",
       "      <td>145.4000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>78</th>\n",
       "      <td>Southport</td>\n",
       "      <td>27.9738</td>\n",
       "      <td>153.4183</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>79</th>\n",
       "      <td>Strahan</td>\n",
       "      <td>42.1500</td>\n",
       "      <td>145.3167</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>80</th>\n",
       "      <td>Swansea</td>\n",
       "      <td>33.0850</td>\n",
       "      <td>151.6350</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>81</th>\n",
       "      <td>Sydney</td>\n",
       "      <td>33.8688</td>\n",
       "      <td>151.2093</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>82</th>\n",
       "      <td>SydneyAirport</td>\n",
       "      <td>33.9399</td>\n",
       "      <td>151.1753</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>83</th>\n",
       "      <td>Tamworth</td>\n",
       "      <td>31.0927</td>\n",
       "      <td>150.9320</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>84</th>\n",
       "      <td>Taroom</td>\n",
       "      <td>25.6406</td>\n",
       "      <td>149.7983</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>85</th>\n",
       "      <td>Telfer</td>\n",
       "      <td>21.6924</td>\n",
       "      <td>122.1478</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>86</th>\n",
       "      <td>TennantCreek</td>\n",
       "      <td>19.6484</td>\n",
       "      <td>134.1900</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>87</th>\n",
       "      <td>Thargomindah</td>\n",
       "      <td>27.9944</td>\n",
       "      <td>143.8229</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>88</th>\n",
       "      <td>Tibooburra</td>\n",
       "      <td>29.4331</td>\n",
       "      <td>142.0108</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>89</th>\n",
       "      <td>TimberCreek</td>\n",
       "      <td>15.6432</td>\n",
       "      <td>130.4666</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>90</th>\n",
       "      <td>Townsville</td>\n",
       "      <td>19.2590</td>\n",
       "      <td>146.8169</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>91</th>\n",
       "      <td>Warburton</td>\n",
       "      <td>26.1353</td>\n",
       "      <td>126.5783</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>92</th>\n",
       "      <td>Weipa</td>\n",
       "      <td>12.6493</td>\n",
       "      <td>141.8470</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>93</th>\n",
       "      <td>Whyalla</td>\n",
       "      <td>33.0380</td>\n",
       "      <td>137.5753</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>94</th>\n",
       "      <td>Wiluna</td>\n",
       "      <td>26.5950</td>\n",
       "      <td>120.2250</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>95</th>\n",
       "      <td>Wollongong</td>\n",
       "      <td>34.4278</td>\n",
       "      <td>150.8931</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>96</th>\n",
       "      <td>Wyndham</td>\n",
       "      <td>15.4825</td>\n",
       "      <td>128.1228</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>97</th>\n",
       "      <td>Yalgoo</td>\n",
       "      <td>28.3445</td>\n",
       "      <td>116.6851</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>98</th>\n",
       "      <td>Yulara</td>\n",
       "      <td>25.2335</td>\n",
       "      <td>130.9849</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>99</th>\n",
       "      <td>Uluru</td>\n",
       "      <td>25.3444</td>\n",
       "      <td>131.0369</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>100 rows × 3 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "             City  Latitudenum  Longitudenum\n",
       "0        Adelaide      34.9285      138.6007\n",
       "1          Albany      35.0275      117.8840\n",
       "2          Albury      36.0737      146.9135\n",
       "3         Wodonga      36.1241      146.8818\n",
       "4    AliceSprings      23.6980      133.8807\n",
       "5           Amata      26.1509      131.1467\n",
       "6        Ballarat      37.5622      143.8503\n",
       "7        Bathurst      33.4193      149.5775\n",
       "8      Birdsville      25.8989      139.3517\n",
       "9      Borroloola      16.0703      136.3072\n",
       "10         Bourke      30.0907      145.9382\n",
       "11       Brisbane      27.4698      153.0251\n",
       "12     BrokenHill      31.9539      141.4539\n",
       "13         Broome      17.9614      122.2359\n",
       "14        Bunbury      33.3256      115.6396\n",
       "15      Burketown      17.8522      139.6332\n",
       "16          Burra      33.6800      138.9363\n",
       "17         Cairns      16.9186      145.7781\n",
       "18       Canberra      35.2809      149.1300\n",
       "19      Carnarvon      24.8840      113.6610\n",
       "20         Ceduna      32.1306      133.6817\n",
       "21    Charleville      26.4021      146.2454\n",
       "22     CooberPedy      29.0139      134.7533\n",
       "23       Cooktown      15.4758      145.2471\n",
       "24   CoffsHarbour      30.2986      153.1094\n",
       "25        Dampier      20.6582      116.7151\n",
       "26         Darwin      12.4634      130.8456\n",
       "27          Derby      17.3179      123.6490\n",
       "28      Devonport      41.1771      146.3452\n",
       "29          Dubbo      32.2315      148.6330\n",
       "..            ...          ...           ...\n",
       "70          Perth      31.9505      115.8605\n",
       "71   PerthAirport      31.9440      115.9680\n",
       "72    PortHedland      20.3107      118.5878\n",
       "73    PortLincoln      34.7240      135.8611\n",
       "74  PortMacquarie      31.4333      152.9000\n",
       "75        Renmark      34.1743      140.7443\n",
       "76    Rockhampton      23.3791      150.5100\n",
       "77     Shepparton      36.3833      145.4000\n",
       "78      Southport      27.9738      153.4183\n",
       "79        Strahan      42.1500      145.3167\n",
       "80        Swansea      33.0850      151.6350\n",
       "81         Sydney      33.8688      151.2093\n",
       "82  SydneyAirport      33.9399      151.1753\n",
       "83       Tamworth      31.0927      150.9320\n",
       "84         Taroom      25.6406      149.7983\n",
       "85         Telfer      21.6924      122.1478\n",
       "86   TennantCreek      19.6484      134.1900\n",
       "87   Thargomindah      27.9944      143.8229\n",
       "88     Tibooburra      29.4331      142.0108\n",
       "89    TimberCreek      15.6432      130.4666\n",
       "90     Townsville      19.2590      146.8169\n",
       "91      Warburton      26.1353      126.5783\n",
       "92          Weipa      12.6493      141.8470\n",
       "93        Whyalla      33.0380      137.5753\n",
       "94         Wiluna      26.5950      120.2250\n",
       "95     Wollongong      34.4278      150.8931\n",
       "96        Wyndham      15.4825      128.1228\n",
       "97         Yalgoo      28.3445      116.6851\n",
       "98         Yulara      25.2335      130.9849\n",
       "99          Uluru      25.3444      131.0369\n",
       "\n",
       "[100 rows x 3 columns]"
      ]
     },
     "execution_count": 65,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "citylld"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Python\\lib\\site-packages\\ipykernel_launcher.py:2: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",
      "  \n"
     ]
    }
   ],
   "source": [
    "#将city_climate中的气候添加到我们的citylld中\n",
    "citylld[\"climate\"] = city_climate.iloc[:,-1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 67,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>City</th>\n",
       "      <th>Latitudenum</th>\n",
       "      <th>Longitudenum</th>\n",
       "      <th>climate</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Adelaide</td>\n",
       "      <td>34.9285</td>\n",
       "      <td>138.6007</td>\n",
       "      <td>Warm temperate</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Albany</td>\n",
       "      <td>35.0275</td>\n",
       "      <td>117.8840</td>\n",
       "      <td>Mild temperate</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Albury</td>\n",
       "      <td>36.0737</td>\n",
       "      <td>146.9135</td>\n",
       "      <td>Hot dry summer, cool winter</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Wodonga</td>\n",
       "      <td>36.1241</td>\n",
       "      <td>146.8818</td>\n",
       "      <td>Hot dry summer, cool winter</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>AliceSprings</td>\n",
       "      <td>23.6980</td>\n",
       "      <td>133.8807</td>\n",
       "      <td>Hot dry summer, warm winter</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "           City  Latitudenum  Longitudenum                      climate\n",
       "0      Adelaide      34.9285      138.6007            Warm temperate   \n",
       "1        Albany      35.0275      117.8840            Mild temperate   \n",
       "2        Albury      36.0737      146.9135  Hot dry summer, cool winter\n",
       "3       Wodonga      36.1241      146.8818  Hot dry summer, cool winter\n",
       "4  AliceSprings      23.6980      133.8807  Hot dry summer, warm winter"
      ]
     },
     "execution_count": 67,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "citylld.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 68,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Hot dry summer, cool winter          24\n",
       "Warm temperate                       18\n",
       "Hot dry summer, warm winter          18\n",
       "High humidity summer, warm winter    17\n",
       "Cool temperate                        9\n",
       "Mild temperate                        9\n",
       "Warm humid summer, mild winter        5\n",
       "Name: climate, dtype: int64"
      ]
     },
     "execution_count": 68,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "citylld.loc[:,\"climate\"].value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 69,
   "metadata": {},
   "outputs": [],
   "source": [
    "samplecity = pd.read_csv(r\"C:\\work\\learnbetter\\micro-class\\week 8 SVM (2)\\samplecity.csv\",index_col=0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 70,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>City</th>\n",
       "      <th>Latitude</th>\n",
       "      <th>Longitude</th>\n",
       "      <th>Latitudedir</th>\n",
       "      <th>Longitudedir</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Canberra</td>\n",
       "      <td>35.2809°</td>\n",
       "      <td>149.1300°</td>\n",
       "      <td>S,</td>\n",
       "      <td>E</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Sydney</td>\n",
       "      <td>33.8688°</td>\n",
       "      <td>151.2093°</td>\n",
       "      <td>S,</td>\n",
       "      <td>E</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Perth</td>\n",
       "      <td>31.9505°</td>\n",
       "      <td>115.8605°</td>\n",
       "      <td>S,</td>\n",
       "      <td>E</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Darwin</td>\n",
       "      <td>12.4634°</td>\n",
       "      <td>130.8456°</td>\n",
       "      <td>S,</td>\n",
       "      <td>E</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Hobart</td>\n",
       "      <td>42.8821°</td>\n",
       "      <td>147.3272°</td>\n",
       "      <td>S,</td>\n",
       "      <td>E</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       City  Latitude  Longitude Latitudedir Longitudedir\n",
       "0  Canberra  35.2809°  149.1300°          S,            E\n",
       "1    Sydney  33.8688°  151.2093°          S,            E\n",
       "2     Perth  31.9505°  115.8605°          S,            E\n",
       "3    Darwin  12.4634°  130.8456°          S,            E\n",
       "4    Hobart  42.8821°  147.3272°          S,            E"
      ]
     },
     "execution_count": 70,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "samplecity.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 71,
   "metadata": {},
   "outputs": [],
   "source": [
    "#我们对samplecity也执行同样的处理：去掉经纬度中度数的符号，并且舍弃我们的经纬度的方向\n",
    "samplecity[\"Latitudenum\"] = samplecity[\"Latitude\"].apply(lambda x:float(x[:-1]))\n",
    "samplecity[\"Longitudenum\"] = samplecity[\"Longitude\"].apply(lambda x:float(x[:-1]))\n",
    "samplecityd = samplecity.iloc[:,[0,5,6]]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 72,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>City</th>\n",
       "      <th>Latitudenum</th>\n",
       "      <th>Longitudenum</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Canberra</td>\n",
       "      <td>35.2809</td>\n",
       "      <td>149.1300</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Sydney</td>\n",
       "      <td>33.8688</td>\n",
       "      <td>151.2093</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Perth</td>\n",
       "      <td>31.9505</td>\n",
       "      <td>115.8605</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Darwin</td>\n",
       "      <td>12.4634</td>\n",
       "      <td>130.8456</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Hobart</td>\n",
       "      <td>42.8821</td>\n",
       "      <td>147.3272</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       City  Latitudenum  Longitudenum\n",
       "0  Canberra      35.2809      149.1300\n",
       "1    Sydney      33.8688      151.2093\n",
       "2     Perth      31.9505      115.8605\n",
       "3    Darwin      12.4634      130.8456\n",
       "4    Hobart      42.8821      147.3272"
      ]
     },
     "execution_count": 72,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "samplecityd.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 73,
   "metadata": {},
   "outputs": [],
   "source": [
    "#首先使用radians将角度转换成弧度\n",
    "from math import radians, sin, cos, acos\n",
    "citylld.loc[:,\"slat\"] = citylld.iloc[:,1].apply(lambda x : radians(x))\n",
    "citylld.loc[:,\"slon\"] = citylld.iloc[:,2].apply(lambda x : radians(x))\n",
    "samplecityd.loc[:,\"elat\"] = samplecityd.iloc[:,1].apply(lambda x : radians(x))\n",
    "samplecityd.loc[:,\"elon\"] = samplecityd.iloc[:,2].apply(lambda x : radians(x))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 74,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Python\\lib\\site-packages\\ipykernel_launcher.py:8: RuntimeWarning: invalid value encountered in arccos\n",
      "  \n"
     ]
    }
   ],
   "source": [
    "import sys\n",
    "for i in range(samplecityd.shape[0]):\n",
    "    slat = citylld.loc[:,\"slat\"]\n",
    "    slon = citylld.loc[:,\"slon\"]\n",
    "    elat = samplecityd.loc[i,\"elat\"]\n",
    "    elon = samplecityd.loc[i,\"elon\"]\n",
    "    dist = 6371.01 * np.arccos(np.sin(slat)*np.sin(elat) + \n",
    "                          np.cos(slat)*np.cos(elat)*np.cos(slon.values - elon))\n",
    "    city_index = np.argsort(dist)[0]\n",
    "    #每次计算后，取距离最近的城市，然后将最近的城市和城市对应的气候都匹配到samplecityd中\n",
    "    samplecityd.loc[i,\"closest_city\"] = citylld.loc[city_index,\"City\"]\n",
    "    samplecityd.loc[i,\"climate\"] = citylld.loc[city_index,\"climate\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 75,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>City</th>\n",
       "      <th>Latitudenum</th>\n",
       "      <th>Longitudenum</th>\n",
       "      <th>elat</th>\n",
       "      <th>elon</th>\n",
       "      <th>closest_city</th>\n",
       "      <th>climate</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Canberra</td>\n",
       "      <td>35.2809</td>\n",
       "      <td>149.1300</td>\n",
       "      <td>0.615768</td>\n",
       "      <td>2.602810</td>\n",
       "      <td>Canberra</td>\n",
       "      <td>Cool temperate</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Sydney</td>\n",
       "      <td>33.8688</td>\n",
       "      <td>151.2093</td>\n",
       "      <td>0.591122</td>\n",
       "      <td>2.639100</td>\n",
       "      <td>Sydney</td>\n",
       "      <td>Warm temperate</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Perth</td>\n",
       "      <td>31.9505</td>\n",
       "      <td>115.8605</td>\n",
       "      <td>0.557641</td>\n",
       "      <td>2.022147</td>\n",
       "      <td>Perth</td>\n",
       "      <td>Warm temperate</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Darwin</td>\n",
       "      <td>12.4634</td>\n",
       "      <td>130.8456</td>\n",
       "      <td>0.217527</td>\n",
       "      <td>2.283687</td>\n",
       "      <td>Darwin</td>\n",
       "      <td>High humidity summer, warm winter</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Hobart</td>\n",
       "      <td>42.8821</td>\n",
       "      <td>147.3272</td>\n",
       "      <td>0.748434</td>\n",
       "      <td>2.571345</td>\n",
       "      <td>Hobart</td>\n",
       "      <td>Cool temperate</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       City  Latitudenum  Longitudenum      elat      elon closest_city  \\\n",
       "0  Canberra      35.2809      149.1300  0.615768  2.602810     Canberra   \n",
       "1    Sydney      33.8688      151.2093  0.591122  2.639100       Sydney   \n",
       "2     Perth      31.9505      115.8605  0.557641  2.022147        Perth   \n",
       "3    Darwin      12.4634      130.8456  0.217527  2.283687       Darwin   \n",
       "4    Hobart      42.8821      147.3272  0.748434  2.571345       Hobart   \n",
       "\n",
       "                             climate  \n",
       "0                  Cool temperate     \n",
       "1                  Warm temperate     \n",
       "2                  Warm temperate     \n",
       "3  High humidity summer, warm winter  \n",
       "4                  Cool temperate     "
      ]
     },
     "execution_count": 75,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#查看最后的结果，需要检查城市匹配是否基本正确\n",
    "samplecityd.head(5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 76,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Warm temperate                       15\n",
       "Mild temperate                       10\n",
       "Cool temperate                        9\n",
       "Hot dry summer, cool winter           6\n",
       "High humidity summer, warm winter     4\n",
       "Hot dry summer, warm winter           3\n",
       "Warm humid summer, mild winter        2\n",
       "Name: climate, dtype: int64"
      ]
     },
     "execution_count": 76,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#查看气候的分布\n",
    "samplecityd[\"climate\"].value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 77,
   "metadata": {},
   "outputs": [],
   "source": [
    "#确认无误后，取出样本城市所对应的气候，并保存\n",
    "locafinal = samplecityd.iloc[:,[0,-1]]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 78,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>City</th>\n",
       "      <th>climate</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Canberra</td>\n",
       "      <td>Cool temperate</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Sydney</td>\n",
       "      <td>Warm temperate</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Perth</td>\n",
       "      <td>Warm temperate</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Darwin</td>\n",
       "      <td>High humidity summer, warm winter</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Hobart</td>\n",
       "      <td>Cool temperate</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       City                            climate\n",
       "0  Canberra                  Cool temperate   \n",
       "1    Sydney                  Warm temperate   \n",
       "2     Perth                  Warm temperate   \n",
       "3    Darwin  High humidity summer, warm winter\n",
       "4    Hobart                  Cool temperate   "
      ]
     },
     "execution_count": 78,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "locafinal.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 79,
   "metadata": {},
   "outputs": [],
   "source": [
    "locafinal.columns = [\"Location\",\"Climate\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 80,
   "metadata": {},
   "outputs": [],
   "source": [
    "#在这里设定locafinal的索引为地点，是为了之后进行map的匹配\n",
    "locafinal = locafinal.set_index(keys=\"Location\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 81,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Climate</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Location</th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Canberra</th>\n",
       "      <td>Cool temperate</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Sydney</th>\n",
       "      <td>Warm temperate</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Perth</th>\n",
       "      <td>Warm temperate</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Darwin</th>\n",
       "      <td>High humidity summer, warm winter</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Hobart</th>\n",
       "      <td>Cool temperate</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Brisbane</th>\n",
       "      <td>Warm humid summer, mild winter</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Adelaide</th>\n",
       "      <td>Warm temperate</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Bendigo</th>\n",
       "      <td>Cool temperate</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Townsville</th>\n",
       "      <td>High humidity summer, warm winter</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>AliceSprings</th>\n",
       "      <td>Hot dry summer, warm winter</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>MountGambier</th>\n",
       "      <td>Mild temperate</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Launceston</th>\n",
       "      <td>Cool temperate</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Ballarat</th>\n",
       "      <td>Cool temperate</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Albany</th>\n",
       "      <td>Mild temperate</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Albury</th>\n",
       "      <td>Hot dry summer, cool winter</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>PerthAirport</th>\n",
       "      <td>Warm temperate</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>MelbourneAirport</th>\n",
       "      <td>Mild temperate</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Mildura</th>\n",
       "      <td>Hot dry summer, cool winter</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>SydneyAirport</th>\n",
       "      <td>Warm temperate</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Nuriootpa</th>\n",
       "      <td>Warm temperate</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Sale</th>\n",
       "      <td>Mild temperate</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Watsonia</th>\n",
       "      <td>Hot dry summer, cool winter</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Tuggeranong</th>\n",
       "      <td>Cool temperate</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Portland</th>\n",
       "      <td>Mild temperate</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Woomera</th>\n",
       "      <td>Warm temperate</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Cairns</th>\n",
       "      <td>High humidity summer, warm winter</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Cobar</th>\n",
       "      <td>Hot dry summer, cool winter</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Wollongong</th>\n",
       "      <td>Warm temperate</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>GoldCoast</th>\n",
       "      <td>Cool temperate</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>WaggaWagga</th>\n",
       "      <td>Hot dry summer, cool winter</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>NorfolkIsland</th>\n",
       "      <td>Warm temperate</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Penrith</th>\n",
       "      <td>Warm temperate</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>SalmonGums</th>\n",
       "      <td>Hot dry summer, cool winter</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Newcastle</th>\n",
       "      <td>Warm temperate</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>CoffsHarbour</th>\n",
       "      <td>Warm humid summer, mild winter</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Witchcliffe</th>\n",
       "      <td>Warm temperate</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Richmond</th>\n",
       "      <td>Mild temperate</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Dartmoor</th>\n",
       "      <td>Mild temperate</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>NorahHead</th>\n",
       "      <td>Cool temperate</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>BadgerysCreek</th>\n",
       "      <td>Warm temperate</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>MountGinini</th>\n",
       "      <td>Cool temperate</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Moree</th>\n",
       "      <td>Hot dry summer, warm winter</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Walpole</th>\n",
       "      <td>Mild temperate</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>PearceRAAF</th>\n",
       "      <td>Warm temperate</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Williamtown</th>\n",
       "      <td>Warm temperate</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Melbourne</th>\n",
       "      <td>Mild temperate</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Nhil</th>\n",
       "      <td>Mild temperate</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Katherine</th>\n",
       "      <td>High humidity summer, warm winter</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Uluru</th>\n",
       "      <td>Hot dry summer, warm winter</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                            Climate\n",
       "Location                                           \n",
       "Canberra                          Cool temperate   \n",
       "Sydney                            Warm temperate   \n",
       "Perth                             Warm temperate   \n",
       "Darwin            High humidity summer, warm winter\n",
       "Hobart                            Cool temperate   \n",
       "Brisbane             Warm humid summer, mild winter\n",
       "Adelaide                          Warm temperate   \n",
       "Bendigo                           Cool temperate   \n",
       "Townsville        High humidity summer, warm winter\n",
       "AliceSprings            Hot dry summer, warm winter\n",
       "MountGambier                      Mild temperate   \n",
       "Launceston                        Cool temperate   \n",
       "Ballarat                          Cool temperate   \n",
       "Albany                            Mild temperate   \n",
       "Albury                  Hot dry summer, cool winter\n",
       "PerthAirport                      Warm temperate   \n",
       "MelbourneAirport                  Mild temperate   \n",
       "Mildura                 Hot dry summer, cool winter\n",
       "SydneyAirport                     Warm temperate   \n",
       "Nuriootpa                         Warm temperate   \n",
       "Sale                              Mild temperate   \n",
       "Watsonia                Hot dry summer, cool winter\n",
       "Tuggeranong                       Cool temperate   \n",
       "Portland                          Mild temperate   \n",
       "Woomera                           Warm temperate   \n",
       "Cairns            High humidity summer, warm winter\n",
       "Cobar                   Hot dry summer, cool winter\n",
       "Wollongong                        Warm temperate   \n",
       "GoldCoast                         Cool temperate   \n",
       "WaggaWagga              Hot dry summer, cool winter\n",
       "NorfolkIsland                     Warm temperate   \n",
       "Penrith                           Warm temperate   \n",
       "SalmonGums              Hot dry summer, cool winter\n",
       "Newcastle                         Warm temperate   \n",
       "CoffsHarbour         Warm humid summer, mild winter\n",
       "Witchcliffe                       Warm temperate   \n",
       "Richmond                          Mild temperate   \n",
       "Dartmoor                          Mild temperate   \n",
       "NorahHead                         Cool temperate   \n",
       "BadgerysCreek                     Warm temperate   \n",
       "MountGinini                       Cool temperate   \n",
       "Moree                   Hot dry summer, warm winter\n",
       "Walpole                           Mild temperate   \n",
       "PearceRAAF                        Warm temperate   \n",
       "Williamtown                       Warm temperate   \n",
       "Melbourne                         Mild temperate   \n",
       "Nhil                              Mild temperate   \n",
       "Katherine         High humidity summer, warm winter\n",
       "Uluru                   Hot dry summer, warm winter"
      ]
     },
     "execution_count": 81,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "locafinal"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 82,
   "metadata": {},
   "outputs": [],
   "source": [
    "locafinal.to_csv(r\"C:\\work\\learnbetter\\micro-class\\week 8 SVM (2)\\samplelocation.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 83,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Month</th>\n",
       "      <th>Location</th>\n",
       "      <th>MinTemp</th>\n",
       "      <th>MaxTemp</th>\n",
       "      <th>Rainfall</th>\n",
       "      <th>Evaporation</th>\n",
       "      <th>Sunshine</th>\n",
       "      <th>WindGustDir</th>\n",
       "      <th>WindGustSpeed</th>\n",
       "      <th>WindDir9am</th>\n",
       "      <th>...</th>\n",
       "      <th>WindSpeed3pm</th>\n",
       "      <th>Humidity9am</th>\n",
       "      <th>Humidity3pm</th>\n",
       "      <th>Pressure9am</th>\n",
       "      <th>Pressure3pm</th>\n",
       "      <th>Cloud9am</th>\n",
       "      <th>Cloud3pm</th>\n",
       "      <th>Temp9am</th>\n",
       "      <th>Temp3pm</th>\n",
       "      <th>RainToday</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>8</td>\n",
       "      <td>Katherine</td>\n",
       "      <td>17.5</td>\n",
       "      <td>36.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>8.8</td>\n",
       "      <td>NaN</td>\n",
       "      <td>ESE</td>\n",
       "      <td>26.0</td>\n",
       "      <td>NNW</td>\n",
       "      <td>...</td>\n",
       "      <td>15.0</td>\n",
       "      <td>57.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1016.8</td>\n",
       "      <td>1012.2</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>27.5</td>\n",
       "      <td>NaN</td>\n",
       "      <td>No</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>12</td>\n",
       "      <td>Tuggeranong</td>\n",
       "      <td>9.5</td>\n",
       "      <td>25.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NNW</td>\n",
       "      <td>33.0</td>\n",
       "      <td>NE</td>\n",
       "      <td>...</td>\n",
       "      <td>17.0</td>\n",
       "      <td>59.0</td>\n",
       "      <td>31.0</td>\n",
       "      <td>1020.4</td>\n",
       "      <td>1017.5</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>14.6</td>\n",
       "      <td>23.6</td>\n",
       "      <td>No</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>4</td>\n",
       "      <td>Albany</td>\n",
       "      <td>13.0</td>\n",
       "      <td>22.6</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3.8</td>\n",
       "      <td>10.4</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NE</td>\n",
       "      <td>...</td>\n",
       "      <td>31.0</td>\n",
       "      <td>79.0</td>\n",
       "      <td>68.0</td>\n",
       "      <td>1020.3</td>\n",
       "      <td>1015.7</td>\n",
       "      <td>1.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>17.5</td>\n",
       "      <td>20.8</td>\n",
       "      <td>No</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>11</td>\n",
       "      <td>Sale</td>\n",
       "      <td>13.9</td>\n",
       "      <td>29.8</td>\n",
       "      <td>0.0</td>\n",
       "      <td>5.8</td>\n",
       "      <td>5.1</td>\n",
       "      <td>S</td>\n",
       "      <td>37.0</td>\n",
       "      <td>N</td>\n",
       "      <td>...</td>\n",
       "      <td>28.0</td>\n",
       "      <td>82.0</td>\n",
       "      <td>44.0</td>\n",
       "      <td>1012.5</td>\n",
       "      <td>1005.9</td>\n",
       "      <td>6.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>18.5</td>\n",
       "      <td>27.5</td>\n",
       "      <td>No</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4</td>\n",
       "      <td>Mildura</td>\n",
       "      <td>6.0</td>\n",
       "      <td>23.5</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.8</td>\n",
       "      <td>8.6</td>\n",
       "      <td>NNE</td>\n",
       "      <td>24.0</td>\n",
       "      <td>E</td>\n",
       "      <td>...</td>\n",
       "      <td>15.0</td>\n",
       "      <td>58.0</td>\n",
       "      <td>35.0</td>\n",
       "      <td>1019.8</td>\n",
       "      <td>1014.1</td>\n",
       "      <td>2.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>12.4</td>\n",
       "      <td>22.4</td>\n",
       "      <td>No</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 22 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   Month     Location  MinTemp  MaxTemp  Rainfall  Evaporation  Sunshine  \\\n",
       "0      8    Katherine     17.5     36.0       0.0          8.8       NaN   \n",
       "1     12  Tuggeranong      9.5     25.0       0.0          NaN       NaN   \n",
       "2      4       Albany     13.0     22.6       0.0          3.8      10.4   \n",
       "3     11         Sale     13.9     29.8       0.0          5.8       5.1   \n",
       "4      4      Mildura      6.0     23.5       0.0          2.8       8.6   \n",
       "\n",
       "  WindGustDir  WindGustSpeed WindDir9am    ...     WindSpeed3pm  Humidity9am  \\\n",
       "0         ESE           26.0        NNW    ...             15.0         57.0   \n",
       "1         NNW           33.0         NE    ...             17.0         59.0   \n",
       "2         NaN            NaN         NE    ...             31.0         79.0   \n",
       "3           S           37.0          N    ...             28.0         82.0   \n",
       "4         NNE           24.0          E    ...             15.0         58.0   \n",
       "\n",
       "   Humidity3pm  Pressure9am  Pressure3pm  Cloud9am  Cloud3pm  Temp9am  \\\n",
       "0          NaN       1016.8       1012.2       0.0       NaN     27.5   \n",
       "1         31.0       1020.4       1017.5       NaN       NaN     14.6   \n",
       "2         68.0       1020.3       1015.7       1.0       3.0     17.5   \n",
       "3         44.0       1012.5       1005.9       6.0       6.0     18.5   \n",
       "4         35.0       1019.8       1014.1       2.0       4.0     12.4   \n",
       "\n",
       "   Temp3pm  RainToday  \n",
       "0      NaN         No  \n",
       "1     23.6         No  \n",
       "2     20.8         No  \n",
       "3     27.5         No  \n",
       "4     22.4         No  \n",
       "\n",
       "[5 rows x 22 columns]"
      ]
     },
     "execution_count": 83,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#是否还记得训练集长什么样呢？\n",
    "Xtrain.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 84,
   "metadata": {},
   "outputs": [],
   "source": [
    "#将location中的内容替换，并且确保匹配进入的气候字符串中不含有逗号，气候两边不含有空格\n",
    "#我们使用re这个模块来消除逗号\n",
    "#re.sub(希望替换的值，希望被替换成的值，要操作的字符串) #去掉逗号\n",
    "#x.strip()是去掉空格的函数\n",
    "#把location替换成气候的是我们的map的映射\n",
    "import re"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 85,
   "metadata": {},
   "outputs": [],
   "source": [
    "#气象站的名字替换成了对应的城市对应的气候\n",
    "Xtrain[\"Location\"] = Xtrain[\"Location\"].map(locafinal.iloc[:,0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 86,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Month</th>\n",
       "      <th>Location</th>\n",
       "      <th>MinTemp</th>\n",
       "      <th>MaxTemp</th>\n",
       "      <th>Rainfall</th>\n",
       "      <th>Evaporation</th>\n",
       "      <th>Sunshine</th>\n",
       "      <th>WindGustDir</th>\n",
       "      <th>WindGustSpeed</th>\n",
       "      <th>WindDir9am</th>\n",
       "      <th>...</th>\n",
       "      <th>WindSpeed3pm</th>\n",
       "      <th>Humidity9am</th>\n",
       "      <th>Humidity3pm</th>\n",
       "      <th>Pressure9am</th>\n",
       "      <th>Pressure3pm</th>\n",
       "      <th>Cloud9am</th>\n",
       "      <th>Cloud3pm</th>\n",
       "      <th>Temp9am</th>\n",
       "      <th>Temp3pm</th>\n",
       "      <th>RainToday</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>8</td>\n",
       "      <td>High humidity summer, warm winter</td>\n",
       "      <td>17.5</td>\n",
       "      <td>36.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>8.8</td>\n",
       "      <td>NaN</td>\n",
       "      <td>ESE</td>\n",
       "      <td>26.0</td>\n",
       "      <td>NNW</td>\n",
       "      <td>...</td>\n",
       "      <td>15.0</td>\n",
       "      <td>57.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1016.8</td>\n",
       "      <td>1012.2</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>27.5</td>\n",
       "      <td>NaN</td>\n",
       "      <td>No</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>12</td>\n",
       "      <td>Cool temperate</td>\n",
       "      <td>9.5</td>\n",
       "      <td>25.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NNW</td>\n",
       "      <td>33.0</td>\n",
       "      <td>NE</td>\n",
       "      <td>...</td>\n",
       "      <td>17.0</td>\n",
       "      <td>59.0</td>\n",
       "      <td>31.0</td>\n",
       "      <td>1020.4</td>\n",
       "      <td>1017.5</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>14.6</td>\n",
       "      <td>23.6</td>\n",
       "      <td>No</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>4</td>\n",
       "      <td>Mild temperate</td>\n",
       "      <td>13.0</td>\n",
       "      <td>22.6</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3.8</td>\n",
       "      <td>10.4</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NE</td>\n",
       "      <td>...</td>\n",
       "      <td>31.0</td>\n",
       "      <td>79.0</td>\n",
       "      <td>68.0</td>\n",
       "      <td>1020.3</td>\n",
       "      <td>1015.7</td>\n",
       "      <td>1.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>17.5</td>\n",
       "      <td>20.8</td>\n",
       "      <td>No</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>11</td>\n",
       "      <td>Mild temperate</td>\n",
       "      <td>13.9</td>\n",
       "      <td>29.8</td>\n",
       "      <td>0.0</td>\n",
       "      <td>5.8</td>\n",
       "      <td>5.1</td>\n",
       "      <td>S</td>\n",
       "      <td>37.0</td>\n",
       "      <td>N</td>\n",
       "      <td>...</td>\n",
       "      <td>28.0</td>\n",
       "      <td>82.0</td>\n",
       "      <td>44.0</td>\n",
       "      <td>1012.5</td>\n",
       "      <td>1005.9</td>\n",
       "      <td>6.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>18.5</td>\n",
       "      <td>27.5</td>\n",
       "      <td>No</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4</td>\n",
       "      <td>Hot dry summer, cool winter</td>\n",
       "      <td>6.0</td>\n",
       "      <td>23.5</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.8</td>\n",
       "      <td>8.6</td>\n",
       "      <td>NNE</td>\n",
       "      <td>24.0</td>\n",
       "      <td>E</td>\n",
       "      <td>...</td>\n",
       "      <td>15.0</td>\n",
       "      <td>58.0</td>\n",
       "      <td>35.0</td>\n",
       "      <td>1019.8</td>\n",
       "      <td>1014.1</td>\n",
       "      <td>2.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>12.4</td>\n",
       "      <td>22.4</td>\n",
       "      <td>No</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 22 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   Month                           Location  MinTemp  MaxTemp  Rainfall  \\\n",
       "0      8  High humidity summer, warm winter     17.5     36.0       0.0   \n",
       "1     12                  Cool temperate         9.5     25.0       0.0   \n",
       "2      4                  Mild temperate        13.0     22.6       0.0   \n",
       "3     11                  Mild temperate        13.9     29.8       0.0   \n",
       "4      4        Hot dry summer, cool winter      6.0     23.5       0.0   \n",
       "\n",
       "   Evaporation  Sunshine WindGustDir  WindGustSpeed WindDir9am    ...      \\\n",
       "0          8.8       NaN         ESE           26.0        NNW    ...       \n",
       "1          NaN       NaN         NNW           33.0         NE    ...       \n",
       "2          3.8      10.4         NaN            NaN         NE    ...       \n",
       "3          5.8       5.1           S           37.0          N    ...       \n",
       "4          2.8       8.6         NNE           24.0          E    ...       \n",
       "\n",
       "  WindSpeed3pm  Humidity9am  Humidity3pm  Pressure9am  Pressure3pm  Cloud9am  \\\n",
       "0         15.0         57.0          NaN       1016.8       1012.2       0.0   \n",
       "1         17.0         59.0         31.0       1020.4       1017.5       NaN   \n",
       "2         31.0         79.0         68.0       1020.3       1015.7       1.0   \n",
       "3         28.0         82.0         44.0       1012.5       1005.9       6.0   \n",
       "4         15.0         58.0         35.0       1019.8       1014.1       2.0   \n",
       "\n",
       "   Cloud3pm  Temp9am  Temp3pm  RainToday  \n",
       "0       NaN     27.5      NaN         No  \n",
       "1       NaN     14.6     23.6         No  \n",
       "2       3.0     17.5     20.8         No  \n",
       "3       6.0     18.5     27.5         No  \n",
       "4       4.0     12.4     22.4         No  \n",
       "\n",
       "[5 rows x 22 columns]"
      ]
     },
     "execution_count": 86,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "Xtrain.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 87,
   "metadata": {},
   "outputs": [],
   "source": [
    "#城市的气候中所含的逗号和空格都去掉\n",
    "Xtrain[\"Location\"] = Xtrain[\"Location\"].apply(lambda x:re.sub(\",\",\"\",x.strip()))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 88,
   "metadata": {},
   "outputs": [],
   "source": [
    "Xtest[\"Location\"] = Xtest[\"Location\"].map(locafinal.iloc[:,0]).apply(lambda x:re.sub(\",\",\"\",x.strip()))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 89,
   "metadata": {},
   "outputs": [],
   "source": [
    "#修改特征内容之后，我们使用新列名“Climate”来替换之前的列名“Location”\n",
    "#注意这个命令一旦执行之后，就再没有列\"Location\"了，使用索引时要特别注意\n",
    "Xtrain = Xtrain.rename(columns={\"Location\":\"Climate\"})\n",
    "Xtest = Xtest.rename(columns={\"Location\":\"Climate\"})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 90,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Month</th>\n",
       "      <th>Climate</th>\n",
       "      <th>MinTemp</th>\n",
       "      <th>MaxTemp</th>\n",
       "      <th>Rainfall</th>\n",
       "      <th>Evaporation</th>\n",
       "      <th>Sunshine</th>\n",
       "      <th>WindGustDir</th>\n",
       "      <th>WindGustSpeed</th>\n",
       "      <th>WindDir9am</th>\n",
       "      <th>...</th>\n",
       "      <th>WindSpeed3pm</th>\n",
       "      <th>Humidity9am</th>\n",
       "      <th>Humidity3pm</th>\n",
       "      <th>Pressure9am</th>\n",
       "      <th>Pressure3pm</th>\n",
       "      <th>Cloud9am</th>\n",
       "      <th>Cloud3pm</th>\n",
       "      <th>Temp9am</th>\n",
       "      <th>Temp3pm</th>\n",
       "      <th>RainToday</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>8</td>\n",
       "      <td>High humidity summer warm winter</td>\n",
       "      <td>17.5</td>\n",
       "      <td>36.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>8.8</td>\n",
       "      <td>NaN</td>\n",
       "      <td>ESE</td>\n",
       "      <td>26.0</td>\n",
       "      <td>NNW</td>\n",
       "      <td>...</td>\n",
       "      <td>15.0</td>\n",
       "      <td>57.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1016.8</td>\n",
       "      <td>1012.2</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>27.5</td>\n",
       "      <td>NaN</td>\n",
       "      <td>No</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>12</td>\n",
       "      <td>Cool temperate</td>\n",
       "      <td>9.5</td>\n",
       "      <td>25.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NNW</td>\n",
       "      <td>33.0</td>\n",
       "      <td>NE</td>\n",
       "      <td>...</td>\n",
       "      <td>17.0</td>\n",
       "      <td>59.0</td>\n",
       "      <td>31.0</td>\n",
       "      <td>1020.4</td>\n",
       "      <td>1017.5</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>14.6</td>\n",
       "      <td>23.6</td>\n",
       "      <td>No</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>4</td>\n",
       "      <td>Mild temperate</td>\n",
       "      <td>13.0</td>\n",
       "      <td>22.6</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3.8</td>\n",
       "      <td>10.4</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NE</td>\n",
       "      <td>...</td>\n",
       "      <td>31.0</td>\n",
       "      <td>79.0</td>\n",
       "      <td>68.0</td>\n",
       "      <td>1020.3</td>\n",
       "      <td>1015.7</td>\n",
       "      <td>1.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>17.5</td>\n",
       "      <td>20.8</td>\n",
       "      <td>No</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>11</td>\n",
       "      <td>Mild temperate</td>\n",
       "      <td>13.9</td>\n",
       "      <td>29.8</td>\n",
       "      <td>0.0</td>\n",
       "      <td>5.8</td>\n",
       "      <td>5.1</td>\n",
       "      <td>S</td>\n",
       "      <td>37.0</td>\n",
       "      <td>N</td>\n",
       "      <td>...</td>\n",
       "      <td>28.0</td>\n",
       "      <td>82.0</td>\n",
       "      <td>44.0</td>\n",
       "      <td>1012.5</td>\n",
       "      <td>1005.9</td>\n",
       "      <td>6.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>18.5</td>\n",
       "      <td>27.5</td>\n",
       "      <td>No</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4</td>\n",
       "      <td>Hot dry summer cool winter</td>\n",
       "      <td>6.0</td>\n",
       "      <td>23.5</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.8</td>\n",
       "      <td>8.6</td>\n",
       "      <td>NNE</td>\n",
       "      <td>24.0</td>\n",
       "      <td>E</td>\n",
       "      <td>...</td>\n",
       "      <td>15.0</td>\n",
       "      <td>58.0</td>\n",
       "      <td>35.0</td>\n",
       "      <td>1019.8</td>\n",
       "      <td>1014.1</td>\n",
       "      <td>2.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>12.4</td>\n",
       "      <td>22.4</td>\n",
       "      <td>No</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 22 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   Month                           Climate  MinTemp  MaxTemp  Rainfall  \\\n",
       "0      8  High humidity summer warm winter     17.5     36.0       0.0   \n",
       "1     12                    Cool temperate      9.5     25.0       0.0   \n",
       "2      4                    Mild temperate     13.0     22.6       0.0   \n",
       "3     11                    Mild temperate     13.9     29.8       0.0   \n",
       "4      4        Hot dry summer cool winter      6.0     23.5       0.0   \n",
       "\n",
       "   Evaporation  Sunshine WindGustDir  WindGustSpeed WindDir9am    ...      \\\n",
       "0          8.8       NaN         ESE           26.0        NNW    ...       \n",
       "1          NaN       NaN         NNW           33.0         NE    ...       \n",
       "2          3.8      10.4         NaN            NaN         NE    ...       \n",
       "3          5.8       5.1           S           37.0          N    ...       \n",
       "4          2.8       8.6         NNE           24.0          E    ...       \n",
       "\n",
       "  WindSpeed3pm  Humidity9am  Humidity3pm  Pressure9am  Pressure3pm  Cloud9am  \\\n",
       "0         15.0         57.0          NaN       1016.8       1012.2       0.0   \n",
       "1         17.0         59.0         31.0       1020.4       1017.5       NaN   \n",
       "2         31.0         79.0         68.0       1020.3       1015.7       1.0   \n",
       "3         28.0         82.0         44.0       1012.5       1005.9       6.0   \n",
       "4         15.0         58.0         35.0       1019.8       1014.1       2.0   \n",
       "\n",
       "   Cloud3pm  Temp9am  Temp3pm  RainToday  \n",
       "0       NaN     27.5      NaN         No  \n",
       "1       NaN     14.6     23.6         No  \n",
       "2       3.0     17.5     20.8         No  \n",
       "3       6.0     18.5     27.5         No  \n",
       "4       4.0     12.4     22.4         No  \n",
       "\n",
       "[5 rows x 22 columns]"
      ]
     },
     "execution_count": 90,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "Xtrain.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 91,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Month</th>\n",
       "      <th>Climate</th>\n",
       "      <th>MinTemp</th>\n",
       "      <th>MaxTemp</th>\n",
       "      <th>Rainfall</th>\n",
       "      <th>Evaporation</th>\n",
       "      <th>Sunshine</th>\n",
       "      <th>WindGustDir</th>\n",
       "      <th>WindGustSpeed</th>\n",
       "      <th>WindDir9am</th>\n",
       "      <th>...</th>\n",
       "      <th>WindSpeed3pm</th>\n",
       "      <th>Humidity9am</th>\n",
       "      <th>Humidity3pm</th>\n",
       "      <th>Pressure9am</th>\n",
       "      <th>Pressure3pm</th>\n",
       "      <th>Cloud9am</th>\n",
       "      <th>Cloud3pm</th>\n",
       "      <th>Temp9am</th>\n",
       "      <th>Temp3pm</th>\n",
       "      <th>RainToday</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>Cool temperate</td>\n",
       "      <td>22.0</td>\n",
       "      <td>27.8</td>\n",
       "      <td>25.2</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>SSW</td>\n",
       "      <td>57.0</td>\n",
       "      <td>S</td>\n",
       "      <td>...</td>\n",
       "      <td>37.0</td>\n",
       "      <td>91.0</td>\n",
       "      <td>86.0</td>\n",
       "      <td>1006.6</td>\n",
       "      <td>1008.1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>26.2</td>\n",
       "      <td>23.1</td>\n",
       "      <td>Yes</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>3</td>\n",
       "      <td>Mild temperate</td>\n",
       "      <td>12.0</td>\n",
       "      <td>18.6</td>\n",
       "      <td>2.2</td>\n",
       "      <td>3.0</td>\n",
       "      <td>7.8</td>\n",
       "      <td>SW</td>\n",
       "      <td>52.0</td>\n",
       "      <td>SW</td>\n",
       "      <td>...</td>\n",
       "      <td>28.0</td>\n",
       "      <td>88.0</td>\n",
       "      <td>62.0</td>\n",
       "      <td>1020.2</td>\n",
       "      <td>1019.9</td>\n",
       "      <td>8.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>14.8</td>\n",
       "      <td>17.5</td>\n",
       "      <td>Yes</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>Cool temperate</td>\n",
       "      <td>9.1</td>\n",
       "      <td>13.3</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NE</td>\n",
       "      <td>41.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>10</td>\n",
       "      <td>Warm temperate</td>\n",
       "      <td>13.1</td>\n",
       "      <td>20.3</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>SW</td>\n",
       "      <td>33.0</td>\n",
       "      <td>W</td>\n",
       "      <td>...</td>\n",
       "      <td>24.0</td>\n",
       "      <td>40.0</td>\n",
       "      <td>51.0</td>\n",
       "      <td>1021.3</td>\n",
       "      <td>1019.5</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>16.8</td>\n",
       "      <td>19.6</td>\n",
       "      <td>No</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>11</td>\n",
       "      <td>Mild temperate</td>\n",
       "      <td>12.2</td>\n",
       "      <td>20.0</td>\n",
       "      <td>0.4</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>E</td>\n",
       "      <td>33.0</td>\n",
       "      <td>SW</td>\n",
       "      <td>...</td>\n",
       "      <td>19.0</td>\n",
       "      <td>92.0</td>\n",
       "      <td>69.0</td>\n",
       "      <td>1015.6</td>\n",
       "      <td>1013.2</td>\n",
       "      <td>8.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>13.6</td>\n",
       "      <td>19.0</td>\n",
       "      <td>No</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 22 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   Month         Climate  MinTemp  MaxTemp  Rainfall  Evaporation  Sunshine  \\\n",
       "0      1  Cool temperate     22.0     27.8      25.2          NaN       NaN   \n",
       "1      3  Mild temperate     12.0     18.6       2.2          3.0       7.8   \n",
       "2      3  Cool temperate      9.1     13.3       NaN          NaN       NaN   \n",
       "3     10  Warm temperate     13.1     20.3       0.0          NaN       NaN   \n",
       "4     11  Mild temperate     12.2     20.0       0.4          NaN       NaN   \n",
       "\n",
       "  WindGustDir  WindGustSpeed WindDir9am    ...     WindSpeed3pm  Humidity9am  \\\n",
       "0         SSW           57.0          S    ...             37.0         91.0   \n",
       "1          SW           52.0         SW    ...             28.0         88.0   \n",
       "2          NE           41.0        NaN    ...              NaN          NaN   \n",
       "3          SW           33.0          W    ...             24.0         40.0   \n",
       "4           E           33.0         SW    ...             19.0         92.0   \n",
       "\n",
       "   Humidity3pm  Pressure9am  Pressure3pm  Cloud9am  Cloud3pm  Temp9am  \\\n",
       "0         86.0       1006.6       1008.1       NaN       NaN     26.2   \n",
       "1         62.0       1020.2       1019.9       8.0       7.0     14.8   \n",
       "2          NaN          NaN          NaN       NaN       NaN      NaN   \n",
       "3         51.0       1021.3       1019.5       NaN       NaN     16.8   \n",
       "4         69.0       1015.6       1013.2       8.0       4.0     13.6   \n",
       "\n",
       "   Temp3pm  RainToday  \n",
       "0     23.1        Yes  \n",
       "1     17.5        Yes  \n",
       "2      NaN        NaN  \n",
       "3     19.6         No  \n",
       "4     19.0         No  \n",
       "\n",
       "[5 rows x 22 columns]"
      ]
     },
     "execution_count": 91,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "Xtest.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 92,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Month            0.000000\n",
       "Climate          0.000000\n",
       "MinTemp          0.004000\n",
       "MaxTemp          0.003143\n",
       "Rainfall         0.009429\n",
       "Evaporation      0.433429\n",
       "Sunshine         0.488571\n",
       "WindGustDir      0.067714\n",
       "WindGustSpeed    0.067714\n",
       "WindDir9am       0.067429\n",
       "WindDir3pm       0.024286\n",
       "WindSpeed9am     0.009714\n",
       "WindSpeed3pm     0.018000\n",
       "Humidity9am      0.011714\n",
       "Humidity3pm      0.026286\n",
       "Pressure9am      0.098857\n",
       "Pressure3pm      0.098857\n",
       "Cloud9am         0.379714\n",
       "Cloud3pm         0.401429\n",
       "Temp9am          0.005429\n",
       "Temp3pm          0.019714\n",
       "RainToday        0.009429\n",
       "dtype: float64"
      ]
     },
     "execution_count": 92,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#查看缺失值的缺失情况\n",
    "Xtrain.isnull().mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 93,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 3500 entries, 0 to 3499\n",
      "Data columns (total 22 columns):\n",
      "Month            3500 non-null int64\n",
      "Climate          3500 non-null object\n",
      "MinTemp          3486 non-null float64\n",
      "MaxTemp          3489 non-null float64\n",
      "Rainfall         3467 non-null float64\n",
      "Evaporation      1983 non-null float64\n",
      "Sunshine         1790 non-null float64\n",
      "WindGustDir      3263 non-null object\n",
      "WindGustSpeed    3263 non-null float64\n",
      "WindDir9am       3264 non-null object\n",
      "WindDir3pm       3415 non-null object\n",
      "WindSpeed9am     3466 non-null float64\n",
      "WindSpeed3pm     3437 non-null float64\n",
      "Humidity9am      3459 non-null float64\n",
      "Humidity3pm      3408 non-null float64\n",
      "Pressure9am      3154 non-null float64\n",
      "Pressure3pm      3154 non-null float64\n",
      "Cloud9am         2171 non-null float64\n",
      "Cloud3pm         2095 non-null float64\n",
      "Temp9am          3481 non-null float64\n",
      "Temp3pm          3431 non-null float64\n",
      "RainToday        3467 non-null object\n",
      "dtypes: float64(16), int64(1), object(5)\n",
      "memory usage: 601.6+ KB\n"
     ]
    }
   ],
   "source": [
    "Xtrain.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 94,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Month            False\n",
       "Climate           True\n",
       "MinTemp          False\n",
       "MaxTemp          False\n",
       "Rainfall         False\n",
       "Evaporation      False\n",
       "Sunshine         False\n",
       "WindGustDir       True\n",
       "WindGustSpeed    False\n",
       "WindDir9am        True\n",
       "WindDir3pm        True\n",
       "WindSpeed9am     False\n",
       "WindSpeed3pm     False\n",
       "Humidity9am      False\n",
       "Humidity3pm      False\n",
       "Pressure9am      False\n",
       "Pressure3pm      False\n",
       "Cloud9am         False\n",
       "Cloud3pm         False\n",
       "Temp9am          False\n",
       "Temp3pm          False\n",
       "RainToday         True\n",
       "dtype: bool"
      ]
     },
     "execution_count": 94,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "Xtrain.dtypes == \"object\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 95,
   "metadata": {},
   "outputs": [],
   "source": [
    "#首先找出，分类型特征都有哪些\n",
    "cate = Xtrain.columns[Xtrain.dtypes == \"object\"].tolist()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 96,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['Climate', 'WindGustDir', 'WindDir9am', 'WindDir3pm', 'RainToday']"
      ]
     },
     "execution_count": 96,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cate"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 97,
   "metadata": {},
   "outputs": [],
   "source": [
    "#除了特征类型为\"object\"的特征们，还有虽然用数字表示，但是本质为分类型特征的云层遮蔽程度\n",
    "cloud = [\"Cloud9am\",\"Cloud3pm\"]\n",
    "cate = cate + cloud"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 98,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['Climate',\n",
       " 'WindGustDir',\n",
       " 'WindDir9am',\n",
       " 'WindDir3pm',\n",
       " 'RainToday',\n",
       " 'Cloud9am',\n",
       " 'Cloud3pm']"
      ]
     },
     "execution_count": 98,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cate"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 99,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "SimpleImputer(copy=True, fill_value=None, missing_values=nan,\n",
       "       strategy='most_frequent', verbose=0)"
      ]
     },
     "execution_count": 99,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#对于分类型特征，我们使用众数来进行填补\n",
    "from sklearn.impute import SimpleImputer #0.20, conda, pip\n",
    "\n",
    "si = SimpleImputer(missing_values=np.nan,strategy=\"most_frequent\")\n",
    "#注意，我们使用训练集数据来训练我们的填补器，本质是在生成训练集中的众数\n",
    "si.fit(Xtrain.loc[:,cate])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 100,
   "metadata": {},
   "outputs": [],
   "source": [
    "#然后我们用训练集中的众数来同时填补训练集和测试集\n",
    "Xtrain.loc[:,cate] = si.transform(Xtrain.loc[:,cate])\n",
    "Xtest.loc[:,cate] = si.transform(Xtest.loc[:,cate])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 101,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Month</th>\n",
       "      <th>Climate</th>\n",
       "      <th>MinTemp</th>\n",
       "      <th>MaxTemp</th>\n",
       "      <th>Rainfall</th>\n",
       "      <th>Evaporation</th>\n",
       "      <th>Sunshine</th>\n",
       "      <th>WindGustDir</th>\n",
       "      <th>WindGustSpeed</th>\n",
       "      <th>WindDir9am</th>\n",
       "      <th>...</th>\n",
       "      <th>WindSpeed3pm</th>\n",
       "      <th>Humidity9am</th>\n",
       "      <th>Humidity3pm</th>\n",
       "      <th>Pressure9am</th>\n",
       "      <th>Pressure3pm</th>\n",
       "      <th>Cloud9am</th>\n",
       "      <th>Cloud3pm</th>\n",
       "      <th>Temp9am</th>\n",
       "      <th>Temp3pm</th>\n",
       "      <th>RainToday</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>8</td>\n",
       "      <td>High humidity summer warm winter</td>\n",
       "      <td>17.5</td>\n",
       "      <td>36.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>8.8</td>\n",
       "      <td>NaN</td>\n",
       "      <td>ESE</td>\n",
       "      <td>26.0</td>\n",
       "      <td>NNW</td>\n",
       "      <td>...</td>\n",
       "      <td>15.0</td>\n",
       "      <td>57.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1016.8</td>\n",
       "      <td>1012.2</td>\n",
       "      <td>0.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>27.5</td>\n",
       "      <td>NaN</td>\n",
       "      <td>No</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>12</td>\n",
       "      <td>Cool temperate</td>\n",
       "      <td>9.5</td>\n",
       "      <td>25.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NNW</td>\n",
       "      <td>33.0</td>\n",
       "      <td>NE</td>\n",
       "      <td>...</td>\n",
       "      <td>17.0</td>\n",
       "      <td>59.0</td>\n",
       "      <td>31.0</td>\n",
       "      <td>1020.4</td>\n",
       "      <td>1017.5</td>\n",
       "      <td>7.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>14.6</td>\n",
       "      <td>23.6</td>\n",
       "      <td>No</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>4</td>\n",
       "      <td>Mild temperate</td>\n",
       "      <td>13.0</td>\n",
       "      <td>22.6</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3.8</td>\n",
       "      <td>10.4</td>\n",
       "      <td>W</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NE</td>\n",
       "      <td>...</td>\n",
       "      <td>31.0</td>\n",
       "      <td>79.0</td>\n",
       "      <td>68.0</td>\n",
       "      <td>1020.3</td>\n",
       "      <td>1015.7</td>\n",
       "      <td>1.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>17.5</td>\n",
       "      <td>20.8</td>\n",
       "      <td>No</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>11</td>\n",
       "      <td>Mild temperate</td>\n",
       "      <td>13.9</td>\n",
       "      <td>29.8</td>\n",
       "      <td>0.0</td>\n",
       "      <td>5.8</td>\n",
       "      <td>5.1</td>\n",
       "      <td>S</td>\n",
       "      <td>37.0</td>\n",
       "      <td>N</td>\n",
       "      <td>...</td>\n",
       "      <td>28.0</td>\n",
       "      <td>82.0</td>\n",
       "      <td>44.0</td>\n",
       "      <td>1012.5</td>\n",
       "      <td>1005.9</td>\n",
       "      <td>6.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>18.5</td>\n",
       "      <td>27.5</td>\n",
       "      <td>No</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4</td>\n",
       "      <td>Hot dry summer cool winter</td>\n",
       "      <td>6.0</td>\n",
       "      <td>23.5</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.8</td>\n",
       "      <td>8.6</td>\n",
       "      <td>NNE</td>\n",
       "      <td>24.0</td>\n",
       "      <td>E</td>\n",
       "      <td>...</td>\n",
       "      <td>15.0</td>\n",
       "      <td>58.0</td>\n",
       "      <td>35.0</td>\n",
       "      <td>1019.8</td>\n",
       "      <td>1014.1</td>\n",
       "      <td>2.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>12.4</td>\n",
       "      <td>22.4</td>\n",
       "      <td>No</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 22 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   Month                           Climate  MinTemp  MaxTemp  Rainfall  \\\n",
       "0      8  High humidity summer warm winter     17.5     36.0       0.0   \n",
       "1     12                    Cool temperate      9.5     25.0       0.0   \n",
       "2      4                    Mild temperate     13.0     22.6       0.0   \n",
       "3     11                    Mild temperate     13.9     29.8       0.0   \n",
       "4      4        Hot dry summer cool winter      6.0     23.5       0.0   \n",
       "\n",
       "   Evaporation  Sunshine WindGustDir  WindGustSpeed WindDir9am    ...      \\\n",
       "0          8.8       NaN         ESE           26.0        NNW    ...       \n",
       "1          NaN       NaN         NNW           33.0         NE    ...       \n",
       "2          3.8      10.4           W            NaN         NE    ...       \n",
       "3          5.8       5.1           S           37.0          N    ...       \n",
       "4          2.8       8.6         NNE           24.0          E    ...       \n",
       "\n",
       "  WindSpeed3pm  Humidity9am  Humidity3pm  Pressure9am  Pressure3pm  Cloud9am  \\\n",
       "0         15.0         57.0          NaN       1016.8       1012.2       0.0   \n",
       "1         17.0         59.0         31.0       1020.4       1017.5       7.0   \n",
       "2         31.0         79.0         68.0       1020.3       1015.7       1.0   \n",
       "3         28.0         82.0         44.0       1012.5       1005.9       6.0   \n",
       "4         15.0         58.0         35.0       1019.8       1014.1       2.0   \n",
       "\n",
       "   Cloud3pm  Temp9am  Temp3pm  RainToday  \n",
       "0       7.0     27.5      NaN         No  \n",
       "1       7.0     14.6     23.6         No  \n",
       "2       3.0     17.5     20.8         No  \n",
       "3       6.0     18.5     27.5         No  \n",
       "4       4.0     12.4     22.4         No  \n",
       "\n",
       "[5 rows x 22 columns]"
      ]
     },
     "execution_count": 101,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "Xtrain.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 102,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Month</th>\n",
       "      <th>Climate</th>\n",
       "      <th>MinTemp</th>\n",
       "      <th>MaxTemp</th>\n",
       "      <th>Rainfall</th>\n",
       "      <th>Evaporation</th>\n",
       "      <th>Sunshine</th>\n",
       "      <th>WindGustDir</th>\n",
       "      <th>WindGustSpeed</th>\n",
       "      <th>WindDir9am</th>\n",
       "      <th>...</th>\n",
       "      <th>WindSpeed3pm</th>\n",
       "      <th>Humidity9am</th>\n",
       "      <th>Humidity3pm</th>\n",
       "      <th>Pressure9am</th>\n",
       "      <th>Pressure3pm</th>\n",
       "      <th>Cloud9am</th>\n",
       "      <th>Cloud3pm</th>\n",
       "      <th>Temp9am</th>\n",
       "      <th>Temp3pm</th>\n",
       "      <th>RainToday</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>Cool temperate</td>\n",
       "      <td>22.0</td>\n",
       "      <td>27.8</td>\n",
       "      <td>25.2</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>SSW</td>\n",
       "      <td>57.0</td>\n",
       "      <td>S</td>\n",
       "      <td>...</td>\n",
       "      <td>37.0</td>\n",
       "      <td>91.0</td>\n",
       "      <td>86.0</td>\n",
       "      <td>1006.6</td>\n",
       "      <td>1008.1</td>\n",
       "      <td>7.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>26.2</td>\n",
       "      <td>23.1</td>\n",
       "      <td>Yes</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>3</td>\n",
       "      <td>Mild temperate</td>\n",
       "      <td>12.0</td>\n",
       "      <td>18.6</td>\n",
       "      <td>2.2</td>\n",
       "      <td>3.0</td>\n",
       "      <td>7.8</td>\n",
       "      <td>SW</td>\n",
       "      <td>52.0</td>\n",
       "      <td>SW</td>\n",
       "      <td>...</td>\n",
       "      <td>28.0</td>\n",
       "      <td>88.0</td>\n",
       "      <td>62.0</td>\n",
       "      <td>1020.2</td>\n",
       "      <td>1019.9</td>\n",
       "      <td>8.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>14.8</td>\n",
       "      <td>17.5</td>\n",
       "      <td>Yes</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>Cool temperate</td>\n",
       "      <td>9.1</td>\n",
       "      <td>13.3</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NE</td>\n",
       "      <td>41.0</td>\n",
       "      <td>N</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>7.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>No</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>10</td>\n",
       "      <td>Warm temperate</td>\n",
       "      <td>13.1</td>\n",
       "      <td>20.3</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>SW</td>\n",
       "      <td>33.0</td>\n",
       "      <td>W</td>\n",
       "      <td>...</td>\n",
       "      <td>24.0</td>\n",
       "      <td>40.0</td>\n",
       "      <td>51.0</td>\n",
       "      <td>1021.3</td>\n",
       "      <td>1019.5</td>\n",
       "      <td>7.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>16.8</td>\n",
       "      <td>19.6</td>\n",
       "      <td>No</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>11</td>\n",
       "      <td>Mild temperate</td>\n",
       "      <td>12.2</td>\n",
       "      <td>20.0</td>\n",
       "      <td>0.4</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>E</td>\n",
       "      <td>33.0</td>\n",
       "      <td>SW</td>\n",
       "      <td>...</td>\n",
       "      <td>19.0</td>\n",
       "      <td>92.0</td>\n",
       "      <td>69.0</td>\n",
       "      <td>1015.6</td>\n",
       "      <td>1013.2</td>\n",
       "      <td>8.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>13.6</td>\n",
       "      <td>19.0</td>\n",
       "      <td>No</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 22 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   Month         Climate  MinTemp  MaxTemp  Rainfall  Evaporation  Sunshine  \\\n",
       "0      1  Cool temperate     22.0     27.8      25.2          NaN       NaN   \n",
       "1      3  Mild temperate     12.0     18.6       2.2          3.0       7.8   \n",
       "2      3  Cool temperate      9.1     13.3       NaN          NaN       NaN   \n",
       "3     10  Warm temperate     13.1     20.3       0.0          NaN       NaN   \n",
       "4     11  Mild temperate     12.2     20.0       0.4          NaN       NaN   \n",
       "\n",
       "  WindGustDir  WindGustSpeed WindDir9am    ...     WindSpeed3pm  Humidity9am  \\\n",
       "0         SSW           57.0          S    ...             37.0         91.0   \n",
       "1          SW           52.0         SW    ...             28.0         88.0   \n",
       "2          NE           41.0          N    ...              NaN          NaN   \n",
       "3          SW           33.0          W    ...             24.0         40.0   \n",
       "4           E           33.0         SW    ...             19.0         92.0   \n",
       "\n",
       "   Humidity3pm  Pressure9am  Pressure3pm  Cloud9am  Cloud3pm  Temp9am  \\\n",
       "0         86.0       1006.6       1008.1       7.0       7.0     26.2   \n",
       "1         62.0       1020.2       1019.9       8.0       7.0     14.8   \n",
       "2          NaN          NaN          NaN       7.0       7.0      NaN   \n",
       "3         51.0       1021.3       1019.5       7.0       7.0     16.8   \n",
       "4         69.0       1015.6       1013.2       8.0       4.0     13.6   \n",
       "\n",
       "   Temp3pm  RainToday  \n",
       "0     23.1        Yes  \n",
       "1     17.5        Yes  \n",
       "2      NaN         No  \n",
       "3     19.6         No  \n",
       "4     19.0         No  \n",
       "\n",
       "[5 rows x 22 columns]"
      ]
     },
     "execution_count": 102,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "Xtest.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 103,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Climate        0.0\n",
       "WindGustDir    0.0\n",
       "WindDir9am     0.0\n",
       "WindDir3pm     0.0\n",
       "RainToday      0.0\n",
       "Cloud9am       0.0\n",
       "Cloud3pm       0.0\n",
       "dtype: float64"
      ]
     },
     "execution_count": 103,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#查看分类型特征是否依然存在缺失值\n",
    "Xtrain.loc[:,cate].isnull().mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 104,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Climate        0.0\n",
       "WindGustDir    0.0\n",
       "WindDir9am     0.0\n",
       "WindDir3pm     0.0\n",
       "RainToday      0.0\n",
       "Cloud9am       0.0\n",
       "Cloud3pm       0.0\n",
       "dtype: float64"
      ]
     },
     "execution_count": 104,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "Xtest.loc[:,cate].isnull().mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 105,
   "metadata": {},
   "outputs": [],
   "source": [
    "#将所有的分类型变量编码为数字，一个类别是一个数字\n",
    "from sklearn.preprocessing import OrdinalEncoder #只允许二维以上的数据进行输入\n",
    "oe = OrdinalEncoder()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 106,
   "metadata": {},
   "outputs": [],
   "source": [
    "#利用训练集进行fit\n",
    "oe = oe.fit(Xtrain.loc[:,cate])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 107,
   "metadata": {},
   "outputs": [],
   "source": [
    "#用训练集的编码结果来编码训练和测试特征矩阵\n",
    "#在这里如果测试特征矩阵报错，就说明测试集中出现了训练集中从未见过的类别\n",
    "Xtrain.loc[:,cate] = oe.transform(Xtrain.loc[:,cate])\n",
    "Xtest.loc[:,cate] = oe.transform(Xtest.loc[:,cate])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 108,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['Climate',\n",
       " 'WindGustDir',\n",
       " 'WindDir9am',\n",
       " 'WindDir3pm',\n",
       " 'RainToday',\n",
       " 'Cloud9am',\n",
       " 'Cloud3pm']"
      ]
     },
     "execution_count": 108,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cate"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 109,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Climate</th>\n",
       "      <th>WindGustDir</th>\n",
       "      <th>WindDir9am</th>\n",
       "      <th>WindDir3pm</th>\n",
       "      <th>RainToday</th>\n",
       "      <th>Cloud9am</th>\n",
       "      <th>Cloud3pm</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>7.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>7.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>4.0</td>\n",
       "      <td>13.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>6.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>4.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   Climate  WindGustDir  WindDir9am  WindDir3pm  RainToday  Cloud9am  Cloud3pm\n",
       "0      1.0          2.0         6.0         0.0        0.0       0.0       7.0\n",
       "1      0.0          6.0         4.0         6.0        0.0       7.0       7.0\n",
       "2      4.0         13.0         4.0         0.0        0.0       1.0       3.0\n",
       "3      4.0          8.0         3.0         8.0        0.0       6.0       6.0\n",
       "4      2.0          5.0         0.0         6.0        0.0       2.0       4.0"
      ]
     },
     "execution_count": 109,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "Xtrain.loc[:,cate].head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 110,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Climate</th>\n",
       "      <th>WindGustDir</th>\n",
       "      <th>WindDir9am</th>\n",
       "      <th>WindDir3pm</th>\n",
       "      <th>RainToday</th>\n",
       "      <th>Cloud9am</th>\n",
       "      <th>Cloud3pm</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.0</td>\n",
       "      <td>11.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>11.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>7.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>4.0</td>\n",
       "      <td>12.0</td>\n",
       "      <td>12.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>7.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>9.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>7.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>6.0</td>\n",
       "      <td>12.0</td>\n",
       "      <td>13.0</td>\n",
       "      <td>9.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>7.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>12.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>4.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   Climate  WindGustDir  WindDir9am  WindDir3pm  RainToday  Cloud9am  Cloud3pm\n",
       "0      0.0         11.0         8.0        11.0        1.0       7.0       7.0\n",
       "1      4.0         12.0        12.0         8.0        1.0       8.0       7.0\n",
       "2      0.0          4.0         3.0         9.0        0.0       7.0       7.0\n",
       "3      6.0         12.0        13.0         9.0        0.0       7.0       7.0\n",
       "4      4.0          0.0        12.0         0.0        0.0       8.0       4.0"
      ]
     },
     "execution_count": 110,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "Xtest.loc[:,cate].head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 111,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Month</th>\n",
       "      <th>Climate</th>\n",
       "      <th>MinTemp</th>\n",
       "      <th>MaxTemp</th>\n",
       "      <th>Rainfall</th>\n",
       "      <th>Evaporation</th>\n",
       "      <th>Sunshine</th>\n",
       "      <th>WindGustDir</th>\n",
       "      <th>WindGustSpeed</th>\n",
       "      <th>WindDir9am</th>\n",
       "      <th>...</th>\n",
       "      <th>WindSpeed3pm</th>\n",
       "      <th>Humidity9am</th>\n",
       "      <th>Humidity3pm</th>\n",
       "      <th>Pressure9am</th>\n",
       "      <th>Pressure3pm</th>\n",
       "      <th>Cloud9am</th>\n",
       "      <th>Cloud3pm</th>\n",
       "      <th>Temp9am</th>\n",
       "      <th>Temp3pm</th>\n",
       "      <th>RainToday</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>8</td>\n",
       "      <td>1.0</td>\n",
       "      <td>17.5</td>\n",
       "      <td>36.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>8.8</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2.0</td>\n",
       "      <td>26.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>...</td>\n",
       "      <td>15.0</td>\n",
       "      <td>57.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1016.8</td>\n",
       "      <td>1012.2</td>\n",
       "      <td>0.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>27.5</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>12</td>\n",
       "      <td>0.0</td>\n",
       "      <td>9.5</td>\n",
       "      <td>25.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>6.0</td>\n",
       "      <td>33.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>...</td>\n",
       "      <td>17.0</td>\n",
       "      <td>59.0</td>\n",
       "      <td>31.0</td>\n",
       "      <td>1020.4</td>\n",
       "      <td>1017.5</td>\n",
       "      <td>7.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>14.6</td>\n",
       "      <td>23.6</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>4</td>\n",
       "      <td>4.0</td>\n",
       "      <td>13.0</td>\n",
       "      <td>22.6</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3.8</td>\n",
       "      <td>10.4</td>\n",
       "      <td>13.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>4.0</td>\n",
       "      <td>...</td>\n",
       "      <td>31.0</td>\n",
       "      <td>79.0</td>\n",
       "      <td>68.0</td>\n",
       "      <td>1020.3</td>\n",
       "      <td>1015.7</td>\n",
       "      <td>1.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>17.5</td>\n",
       "      <td>20.8</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>11</td>\n",
       "      <td>4.0</td>\n",
       "      <td>13.9</td>\n",
       "      <td>29.8</td>\n",
       "      <td>0.0</td>\n",
       "      <td>5.8</td>\n",
       "      <td>5.1</td>\n",
       "      <td>8.0</td>\n",
       "      <td>37.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>...</td>\n",
       "      <td>28.0</td>\n",
       "      <td>82.0</td>\n",
       "      <td>44.0</td>\n",
       "      <td>1012.5</td>\n",
       "      <td>1005.9</td>\n",
       "      <td>6.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>18.5</td>\n",
       "      <td>27.5</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4</td>\n",
       "      <td>2.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>23.5</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.8</td>\n",
       "      <td>8.6</td>\n",
       "      <td>5.0</td>\n",
       "      <td>24.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>15.0</td>\n",
       "      <td>58.0</td>\n",
       "      <td>35.0</td>\n",
       "      <td>1019.8</td>\n",
       "      <td>1014.1</td>\n",
       "      <td>2.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>12.4</td>\n",
       "      <td>22.4</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 22 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   Month  Climate  MinTemp  MaxTemp  Rainfall  Evaporation  Sunshine  \\\n",
       "0      8      1.0     17.5     36.0       0.0          8.8       NaN   \n",
       "1     12      0.0      9.5     25.0       0.0          NaN       NaN   \n",
       "2      4      4.0     13.0     22.6       0.0          3.8      10.4   \n",
       "3     11      4.0     13.9     29.8       0.0          5.8       5.1   \n",
       "4      4      2.0      6.0     23.5       0.0          2.8       8.6   \n",
       "\n",
       "   WindGustDir  WindGustSpeed  WindDir9am    ...      WindSpeed3pm  \\\n",
       "0          2.0           26.0         6.0    ...              15.0   \n",
       "1          6.0           33.0         4.0    ...              17.0   \n",
       "2         13.0            NaN         4.0    ...              31.0   \n",
       "3          8.0           37.0         3.0    ...              28.0   \n",
       "4          5.0           24.0         0.0    ...              15.0   \n",
       "\n",
       "   Humidity9am  Humidity3pm  Pressure9am  Pressure3pm  Cloud9am  Cloud3pm  \\\n",
       "0         57.0          NaN       1016.8       1012.2       0.0       7.0   \n",
       "1         59.0         31.0       1020.4       1017.5       7.0       7.0   \n",
       "2         79.0         68.0       1020.3       1015.7       1.0       3.0   \n",
       "3         82.0         44.0       1012.5       1005.9       6.0       6.0   \n",
       "4         58.0         35.0       1019.8       1014.1       2.0       4.0   \n",
       "\n",
       "   Temp9am  Temp3pm  RainToday  \n",
       "0     27.5      NaN        0.0  \n",
       "1     14.6     23.6        0.0  \n",
       "2     17.5     20.8        0.0  \n",
       "3     18.5     27.5        0.0  \n",
       "4     12.4     22.4        0.0  \n",
       "\n",
       "[5 rows x 22 columns]"
      ]
     },
     "execution_count": 111,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "Xtrain.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 112,
   "metadata": {},
   "outputs": [],
   "source": [
    "col = Xtrain.columns.tolist()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 113,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['Month',\n",
       " 'Climate',\n",
       " 'MinTemp',\n",
       " 'MaxTemp',\n",
       " 'Rainfall',\n",
       " 'Evaporation',\n",
       " 'Sunshine',\n",
       " 'WindGustDir',\n",
       " 'WindGustSpeed',\n",
       " 'WindDir9am',\n",
       " 'WindDir3pm',\n",
       " 'WindSpeed9am',\n",
       " 'WindSpeed3pm',\n",
       " 'Humidity9am',\n",
       " 'Humidity3pm',\n",
       " 'Pressure9am',\n",
       " 'Pressure3pm',\n",
       " 'Cloud9am',\n",
       " 'Cloud3pm',\n",
       " 'Temp9am',\n",
       " 'Temp3pm',\n",
       " 'RainToday']"
      ]
     },
     "execution_count": 113,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "col"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 114,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['Climate',\n",
       " 'WindGustDir',\n",
       " 'WindDir9am',\n",
       " 'WindDir3pm',\n",
       " 'RainToday',\n",
       " 'Cloud9am',\n",
       " 'Cloud3pm']"
      ]
     },
     "execution_count": 114,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cate"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 115,
   "metadata": {},
   "outputs": [],
   "source": [
    "for i in cate:\n",
    "    col.remove(i)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 116,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['Month',\n",
       " 'MinTemp',\n",
       " 'MaxTemp',\n",
       " 'Rainfall',\n",
       " 'Evaporation',\n",
       " 'Sunshine',\n",
       " 'WindGustSpeed',\n",
       " 'WindSpeed9am',\n",
       " 'WindSpeed3pm',\n",
       " 'Humidity9am',\n",
       " 'Humidity3pm',\n",
       " 'Pressure9am',\n",
       " 'Pressure3pm',\n",
       " 'Temp9am',\n",
       " 'Temp3pm']"
      ]
     },
     "execution_count": 116,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "col"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 117,
   "metadata": {},
   "outputs": [],
   "source": [
    "#实例化模型，填补策略为\"mean\"表示均值\n",
    "impmean = SimpleImputer(missing_values=np.nan,strategy = \"mean\")\n",
    "#用训练集来fit模型\n",
    "impmean = impmean.fit(Xtrain.loc[:,col])\n",
    "#分别在训练集和测试集上进行均值填补\n",
    "Xtrain.loc[:,col] = impmean.transform(Xtrain.loc[:,col])\n",
    "Xtest.loc[:,col] = impmean.transform(Xtest.loc[:,col])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 118,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Month            0.0\n",
       "Climate          0.0\n",
       "MinTemp          0.0\n",
       "MaxTemp          0.0\n",
       "Rainfall         0.0\n",
       "Evaporation      0.0\n",
       "Sunshine         0.0\n",
       "WindGustDir      0.0\n",
       "WindGustSpeed    0.0\n",
       "WindDir9am       0.0\n",
       "WindDir3pm       0.0\n",
       "WindSpeed9am     0.0\n",
       "WindSpeed3pm     0.0\n",
       "Humidity9am      0.0\n",
       "Humidity3pm      0.0\n",
       "Pressure9am      0.0\n",
       "Pressure3pm      0.0\n",
       "Cloud9am         0.0\n",
       "Cloud3pm         0.0\n",
       "Temp9am          0.0\n",
       "Temp3pm          0.0\n",
       "RainToday        0.0\n",
       "dtype: float64"
      ]
     },
     "execution_count": 118,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "Xtrain.isnull().mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 119,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Month            0.0\n",
       "Climate          0.0\n",
       "MinTemp          0.0\n",
       "MaxTemp          0.0\n",
       "Rainfall         0.0\n",
       "Evaporation      0.0\n",
       "Sunshine         0.0\n",
       "WindGustDir      0.0\n",
       "WindGustSpeed    0.0\n",
       "WindDir9am       0.0\n",
       "WindDir3pm       0.0\n",
       "WindSpeed9am     0.0\n",
       "WindSpeed3pm     0.0\n",
       "Humidity9am      0.0\n",
       "Humidity3pm      0.0\n",
       "Pressure9am      0.0\n",
       "Pressure3pm      0.0\n",
       "Cloud9am         0.0\n",
       "Cloud3pm         0.0\n",
       "Temp9am          0.0\n",
       "Temp3pm          0.0\n",
       "RainToday        0.0\n",
       "dtype: float64"
      ]
     },
     "execution_count": 119,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "Xtest.isnull().mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 120,
   "metadata": {},
   "outputs": [],
   "source": [
    "col.remove(\"Month\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 121,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['MinTemp',\n",
       " 'MaxTemp',\n",
       " 'Rainfall',\n",
       " 'Evaporation',\n",
       " 'Sunshine',\n",
       " 'WindGustSpeed',\n",
       " 'WindSpeed9am',\n",
       " 'WindSpeed3pm',\n",
       " 'Humidity9am',\n",
       " 'Humidity3pm',\n",
       " 'Pressure9am',\n",
       " 'Pressure3pm',\n",
       " 'Temp9am',\n",
       " 'Temp3pm']"
      ]
     },
     "execution_count": 121,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "col"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 122,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.preprocessing import StandardScaler #数据转换为均值为0，方差为1的数据\n",
    "#标准化不改变数据的分布，不会把数据变成正态分布的"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 123,
   "metadata": {},
   "outputs": [],
   "source": [
    "ss = StandardScaler()\n",
    "ss = ss.fit(Xtrain.loc[:,col])\n",
    "Xtrain.loc[:,col] = ss.transform(Xtrain.loc[:,col])\n",
    "Xtest.loc[:,col] = ss.transform(Xtest.loc[:,col])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 124,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Month</th>\n",
       "      <th>Climate</th>\n",
       "      <th>MinTemp</th>\n",
       "      <th>MaxTemp</th>\n",
       "      <th>Rainfall</th>\n",
       "      <th>Evaporation</th>\n",
       "      <th>Sunshine</th>\n",
       "      <th>WindGustDir</th>\n",
       "      <th>WindGustSpeed</th>\n",
       "      <th>WindDir9am</th>\n",
       "      <th>...</th>\n",
       "      <th>WindSpeed3pm</th>\n",
       "      <th>Humidity9am</th>\n",
       "      <th>Humidity3pm</th>\n",
       "      <th>Pressure9am</th>\n",
       "      <th>Pressure3pm</th>\n",
       "      <th>Cloud9am</th>\n",
       "      <th>Cloud3pm</th>\n",
       "      <th>Temp9am</th>\n",
       "      <th>Temp3pm</th>\n",
       "      <th>RainToday</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>8.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.826375</td>\n",
       "      <td>1.774044</td>\n",
       "      <td>-0.314379</td>\n",
       "      <td>0.964367</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>2.0</td>\n",
       "      <td>-1.085893e+00</td>\n",
       "      <td>6.0</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.416443</td>\n",
       "      <td>-0.646283</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>-0.122589</td>\n",
       "      <td>-0.453507</td>\n",
       "      <td>0.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>1.612270</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>12.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>-0.427048</td>\n",
       "      <td>0.244031</td>\n",
       "      <td>-0.314379</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>6.0</td>\n",
       "      <td>-5.373993e-01</td>\n",
       "      <td>4.0</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.182051</td>\n",
       "      <td>-0.539186</td>\n",
       "      <td>-1.011310</td>\n",
       "      <td>0.414254</td>\n",
       "      <td>0.340522</td>\n",
       "      <td>7.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>-0.366608</td>\n",
       "      <td>0.270238</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>4.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>0.121324</td>\n",
       "      <td>-0.089790</td>\n",
       "      <td>-0.314379</td>\n",
       "      <td>-0.551534</td>\n",
       "      <td>1.062619</td>\n",
       "      <td>13.0</td>\n",
       "      <td>-1.113509e-15</td>\n",
       "      <td>4.0</td>\n",
       "      <td>...</td>\n",
       "      <td>1.458692</td>\n",
       "      <td>0.531786</td>\n",
       "      <td>0.800547</td>\n",
       "      <td>0.399342</td>\n",
       "      <td>0.070852</td>\n",
       "      <td>1.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>0.078256</td>\n",
       "      <td>-0.132031</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>11.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>0.262334</td>\n",
       "      <td>0.911673</td>\n",
       "      <td>-0.314379</td>\n",
       "      <td>0.054826</td>\n",
       "      <td>-0.885225</td>\n",
       "      <td>8.0</td>\n",
       "      <td>-2.239744e-01</td>\n",
       "      <td>3.0</td>\n",
       "      <td>...</td>\n",
       "      <td>1.107105</td>\n",
       "      <td>0.692432</td>\n",
       "      <td>-0.374711</td>\n",
       "      <td>-0.763819</td>\n",
       "      <td>-1.397352</td>\n",
       "      <td>6.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>0.231658</td>\n",
       "      <td>0.830540</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>-0.975421</td>\n",
       "      <td>0.035393</td>\n",
       "      <td>-0.314379</td>\n",
       "      <td>-0.854715</td>\n",
       "      <td>0.401087</td>\n",
       "      <td>5.0</td>\n",
       "      <td>-1.242605e+00</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.416443</td>\n",
       "      <td>-0.592734</td>\n",
       "      <td>-0.815433</td>\n",
       "      <td>0.324780</td>\n",
       "      <td>-0.168855</td>\n",
       "      <td>2.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>-0.704091</td>\n",
       "      <td>0.097837</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 22 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   Month  Climate   MinTemp   MaxTemp  Rainfall  Evaporation  Sunshine  \\\n",
       "0    8.0      1.0  0.826375  1.774044 -0.314379     0.964367  0.000000   \n",
       "1   12.0      0.0 -0.427048  0.244031 -0.314379     0.000000  0.000000   \n",
       "2    4.0      4.0  0.121324 -0.089790 -0.314379    -0.551534  1.062619   \n",
       "3   11.0      4.0  0.262334  0.911673 -0.314379     0.054826 -0.885225   \n",
       "4    4.0      2.0 -0.975421  0.035393 -0.314379    -0.854715  0.401087   \n",
       "\n",
       "   WindGustDir  WindGustSpeed  WindDir9am    ...      WindSpeed3pm  \\\n",
       "0          2.0  -1.085893e+00         6.0    ...         -0.416443   \n",
       "1          6.0  -5.373993e-01         4.0    ...         -0.182051   \n",
       "2         13.0  -1.113509e-15         4.0    ...          1.458692   \n",
       "3          8.0  -2.239744e-01         3.0    ...          1.107105   \n",
       "4          5.0  -1.242605e+00         0.0    ...         -0.416443   \n",
       "\n",
       "   Humidity9am  Humidity3pm  Pressure9am  Pressure3pm  Cloud9am  Cloud3pm  \\\n",
       "0    -0.646283     0.000000    -0.122589    -0.453507       0.0       7.0   \n",
       "1    -0.539186    -1.011310     0.414254     0.340522       7.0       7.0   \n",
       "2     0.531786     0.800547     0.399342     0.070852       1.0       3.0   \n",
       "3     0.692432    -0.374711    -0.763819    -1.397352       6.0       6.0   \n",
       "4    -0.592734    -0.815433     0.324780    -0.168855       2.0       4.0   \n",
       "\n",
       "    Temp9am   Temp3pm  RainToday  \n",
       "0  1.612270  0.000000        0.0  \n",
       "1 -0.366608  0.270238        0.0  \n",
       "2  0.078256 -0.132031        0.0  \n",
       "3  0.231658  0.830540        0.0  \n",
       "4 -0.704091  0.097837        0.0  \n",
       "\n",
       "[5 rows x 22 columns]"
      ]
     },
     "execution_count": 124,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "Xtrain.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 125,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Month</th>\n",
       "      <th>Climate</th>\n",
       "      <th>MinTemp</th>\n",
       "      <th>MaxTemp</th>\n",
       "      <th>Rainfall</th>\n",
       "      <th>Evaporation</th>\n",
       "      <th>Sunshine</th>\n",
       "      <th>WindGustDir</th>\n",
       "      <th>WindGustSpeed</th>\n",
       "      <th>WindDir9am</th>\n",
       "      <th>...</th>\n",
       "      <th>WindSpeed3pm</th>\n",
       "      <th>Humidity9am</th>\n",
       "      <th>Humidity3pm</th>\n",
       "      <th>Pressure9am</th>\n",
       "      <th>Pressure3pm</th>\n",
       "      <th>Cloud9am</th>\n",
       "      <th>Cloud3pm</th>\n",
       "      <th>Temp9am</th>\n",
       "      <th>Temp3pm</th>\n",
       "      <th>RainToday</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.531425</td>\n",
       "      <td>0.633489</td>\n",
       "      <td>2.871067</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>11.0</td>\n",
       "      <td>1.343150</td>\n",
       "      <td>8.0</td>\n",
       "      <td>...</td>\n",
       "      <td>2.161868e+00</td>\n",
       "      <td>1.174369</td>\n",
       "      <td>1.681991</td>\n",
       "      <td>-1.643646</td>\n",
       "      <td>-1.067755</td>\n",
       "      <td>7.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>1.412848</td>\n",
       "      <td>0.198404</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>3.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>-0.035354</td>\n",
       "      <td>-0.646158</td>\n",
       "      <td>-0.036285</td>\n",
       "      <td>-0.794079</td>\n",
       "      <td>0.107073</td>\n",
       "      <td>12.0</td>\n",
       "      <td>0.951369</td>\n",
       "      <td>12.0</td>\n",
       "      <td>...</td>\n",
       "      <td>1.107105e+00</td>\n",
       "      <td>1.013723</td>\n",
       "      <td>0.506733</td>\n",
       "      <td>0.384430</td>\n",
       "      <td>0.700082</td>\n",
       "      <td>8.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>-0.335927</td>\n",
       "      <td>-0.606132</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>-0.489720</td>\n",
       "      <td>-1.383346</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>4.0</td>\n",
       "      <td>0.089450</td>\n",
       "      <td>3.0</td>\n",
       "      <td>...</td>\n",
       "      <td>-4.163637e-16</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>7.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>10.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>0.136992</td>\n",
       "      <td>-0.409702</td>\n",
       "      <td>-0.314379</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>12.0</td>\n",
       "      <td>-0.537399</td>\n",
       "      <td>13.0</td>\n",
       "      <td>...</td>\n",
       "      <td>6.383207e-01</td>\n",
       "      <td>-1.556609</td>\n",
       "      <td>-0.031928</td>\n",
       "      <td>0.548465</td>\n",
       "      <td>0.640155</td>\n",
       "      <td>7.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>-0.029125</td>\n",
       "      <td>-0.304431</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>11.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>-0.004018</td>\n",
       "      <td>-0.451429</td>\n",
       "      <td>-0.263817</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>-0.537399</td>\n",
       "      <td>12.0</td>\n",
       "      <td>...</td>\n",
       "      <td>5.234093e-02</td>\n",
       "      <td>1.227917</td>\n",
       "      <td>0.849516</td>\n",
       "      <td>-0.301537</td>\n",
       "      <td>-0.303690</td>\n",
       "      <td>8.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>-0.520009</td>\n",
       "      <td>-0.390632</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 22 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   Month  Climate   MinTemp   MaxTemp  Rainfall  Evaporation  Sunshine  \\\n",
       "0    1.0      0.0  1.531425  0.633489  2.871067     0.000000  0.000000   \n",
       "1    3.0      4.0 -0.035354 -0.646158 -0.036285    -0.794079  0.107073   \n",
       "2    3.0      0.0 -0.489720 -1.383346  0.000000     0.000000  0.000000   \n",
       "3   10.0      6.0  0.136992 -0.409702 -0.314379     0.000000  0.000000   \n",
       "4   11.0      4.0 -0.004018 -0.451429 -0.263817     0.000000  0.000000   \n",
       "\n",
       "   WindGustDir  WindGustSpeed  WindDir9am    ...      WindSpeed3pm  \\\n",
       "0         11.0       1.343150         8.0    ...      2.161868e+00   \n",
       "1         12.0       0.951369        12.0    ...      1.107105e+00   \n",
       "2          4.0       0.089450         3.0    ...     -4.163637e-16   \n",
       "3         12.0      -0.537399        13.0    ...      6.383207e-01   \n",
       "4          0.0      -0.537399        12.0    ...      5.234093e-02   \n",
       "\n",
       "   Humidity9am  Humidity3pm  Pressure9am  Pressure3pm  Cloud9am  Cloud3pm  \\\n",
       "0     1.174369     1.681991    -1.643646    -1.067755       7.0       7.0   \n",
       "1     1.013723     0.506733     0.384430     0.700082       8.0       7.0   \n",
       "2     0.000000     0.000000     0.000000     0.000000       7.0       7.0   \n",
       "3    -1.556609    -0.031928     0.548465     0.640155       7.0       7.0   \n",
       "4     1.227917     0.849516    -0.301537    -0.303690       8.0       4.0   \n",
       "\n",
       "    Temp9am   Temp3pm  RainToday  \n",
       "0  1.412848  0.198404        1.0  \n",
       "1 -0.335927 -0.606132        1.0  \n",
       "2  0.000000  0.000000        0.0  \n",
       "3 -0.029125 -0.304431        0.0  \n",
       "4 -0.520009 -0.390632        0.0  \n",
       "\n",
       "[5 rows x 22 columns]"
      ]
     },
     "execution_count": 125,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "Xtest.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 126,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   0\n",
       "0  0\n",
       "1  0\n",
       "2  0\n",
       "3  1\n",
       "4  0"
      ]
     },
     "execution_count": 126,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "Ytrain.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 127,
   "metadata": {},
   "outputs": [],
   "source": [
    "from time import time #随时监控我们的模型的运行时间\n",
    "import datetime\n",
    "from sklearn.svm import SVC\n",
    "from sklearn.model_selection import cross_val_score\n",
    "from sklearn.metrics import roc_auc_score, recall_score"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 128,
   "metadata": {},
   "outputs": [],
   "source": [
    "Ytrain = Ytrain.iloc[:,0].ravel()\n",
    "Ytest = Ytest.iloc[:,0].ravel()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 129,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "linear 's testing accuracy 0.844000, recall is 0.469388', auc is 0.869029\n",
      "00:06:621084\n",
      "poly 's testing accuracy 0.840667, recall is 0.457726', auc is 0.868157\n",
      "00:07:322248\n",
      "rbf 's testing accuracy 0.813333, recall is 0.306122', auc is 0.814873\n",
      "00:09:621109\n",
      "sigmoid 's testing accuracy 0.655333, recall is 0.154519', auc is 0.437308\n",
      "00:10:463819\n"
     ]
    }
   ],
   "source": [
    "#建模选择自然是我们的支持向量机SVC，首先用核函数的学习曲线来选择核函数\n",
    "#我们希望同时观察，精确性，recall以及AUC分数\n",
    "times = time() #因为SVM是计算量很大的模型，所以我们需要时刻监控我们的模型运行时间\n",
    "\n",
    "for kernel in [\"linear\",\"poly\",\"rbf\",\"sigmoid\"]:\n",
    "    clf = SVC(kernel = kernel\n",
    "              ,gamma=\"auto\"\n",
    "              ,degree = 1\n",
    "              ,cache_size = 5000\n",
    "             ).fit(Xtrain, Ytrain)\n",
    "    result = clf.predict(Xtest)\n",
    "    score = clf.score(Xtest,Ytest)\n",
    "    recall = recall_score(Ytest, result)\n",
    "    auc = roc_auc_score(Ytest,clf.decision_function(Xtest))\n",
    "    print(\"%s 's testing accuracy %f, recall is %f', auc is %f\" % (kernel,score,recall,auc))\n",
    "    print(datetime.datetime.fromtimestamp(time()-times).strftime(\"%M:%S:%f\"))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 130,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "linear 's testing accuracy 0.796667, recall is 0.775510', auc is 0.870062\n",
      "00:07:740303\n",
      "poly 's testing accuracy 0.793333, recall is 0.763848', auc is 0.871448\n",
      "00:09:007915\n",
      "rbf 's testing accuracy 0.803333, recall is 0.600583', auc is 0.819713\n",
      "00:11:986949\n",
      "sigmoid 's testing accuracy 0.562000, recall is 0.282799', auc is 0.437119\n",
      "00:14:324700\n"
     ]
    }
   ],
   "source": [
    "times = time()\n",
    "for kernel in [\"linear\",\"poly\",\"rbf\",\"sigmoid\"]:\n",
    "    clf = SVC(kernel = kernel\n",
    "              ,gamma=\"auto\"\n",
    "              ,degree = 1\n",
    "              ,cache_size = 5000\n",
    "              ,class_weight = \"balanced\"\n",
    "             ).fit(Xtrain, Ytrain)\n",
    "    result = clf.predict(Xtest)\n",
    "    score = clf.score(Xtest,Ytest)\n",
    "    recall = recall_score(Ytest, result)\n",
    "    auc = roc_auc_score(Ytest,clf.decision_function(Xtest))\n",
    "    print(\"%s 's testing accuracy %f, recall is %f', auc is %f\" % (kernel,score,recall,auc))\n",
    "    print(datetime.datetime.fromtimestamp(time()-times).strftime(\"%M:%S:%f\"))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 132,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "testing accuracy 0.548000, recall is 0.970845', auc is 0.867172\n",
      "00:12:731999\n"
     ]
    }
   ],
   "source": [
    "times = time()\n",
    "clf = SVC(kernel = \"linear\"\n",
    "          ,gamma=\"auto\"\n",
    "          ,cache_size = 5000\n",
    "          ,class_weight = {1:15} #注意，这里写的其实是，类别1：10，隐藏了类别0：1这个比例\n",
    "         ).fit(Xtrain, Ytrain)\n",
    "result = clf.predict(Xtest)\n",
    "score = clf.score(Xtest,Ytest)\n",
    "recall = recall_score(Ytest, result)\n",
    "auc = roc_auc_score(Ytest,clf.decision_function(Xtest))\n",
    "print(\"testing accuracy %f, recall is %f', auc is %f\" %(score,recall,auc))\n",
    "print(datetime.datetime.fromtimestamp(time()-times).strftime(\"%M:%S:%f\"))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 133,
   "metadata": {},
   "outputs": [],
   "source": [
    "valuec = pd.Series(Ytest).value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 134,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    1157\n",
       "1     343\n",
       "dtype: int64"
      ]
     },
     "execution_count": 134,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "valuec"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 135,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.7713333333333333"
      ]
     },
     "execution_count": 135,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "valuec[0]/valuec.sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 136,
   "metadata": {},
   "outputs": [],
   "source": [
    "#查看模型的特异度\n",
    "from sklearn.metrics import confusion_matrix as CM"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 137,
   "metadata": {},
   "outputs": [],
   "source": [
    "clf = SVC(kernel = \"linear\"\n",
    "          ,gamma=\"auto\"\n",
    "          ,cache_size = 5000\n",
    "         ).fit(Xtrain, Ytrain)\n",
    "result = clf.predict(Xtest)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 138,
   "metadata": {},
   "outputs": [],
   "source": [
    "cm = CM(Ytest,result,labels=(1,0))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 139,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[ 161,  182],\n",
       "       [  52, 1105]], dtype=int64)"
      ]
     },
     "execution_count": 139,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cm"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 140,
   "metadata": {},
   "outputs": [],
   "source": [
    "specificity = cm[1,1]/cm[1,:].sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 141,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.9550561797752809"
      ]
     },
     "execution_count": 141,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "specificity #几乎所有的0都被判断正确了，还有不少1也被判断正确了"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 142,
   "metadata": {},
   "outputs": [],
   "source": [
    "irange = np.linspace(0.01,0.05,10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 143,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([0.01      , 0.01444444, 0.01888889, 0.02333333, 0.02777778,\n",
       "       0.03222222, 0.03666667, 0.04111111, 0.04555556, 0.05      ])"
      ]
     },
     "execution_count": 143,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "irange"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 144,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "under ratio 1:1.010000 testing accuracy 0.844667, recall is 0.475219', auc is 0.869157\n",
      "00:06:717088\n",
      "under ratio 1:1.014444 testing accuracy 0.844667, recall is 0.478134', auc is 0.869185\n",
      "00:06:542548\n",
      "under ratio 1:1.018889 testing accuracy 0.844667, recall is 0.478134', auc is 0.869198\n",
      "00:05:809458\n",
      "under ratio 1:1.023333 testing accuracy 0.845333, recall is 0.481050', auc is 0.869175\n",
      "00:07:305287\n",
      "under ratio 1:1.027778 testing accuracy 0.844000, recall is 0.481050', auc is 0.869394\n",
      "00:06:395937\n",
      "under ratio 1:1.032222 testing accuracy 0.844000, recall is 0.481050', auc is 0.869528\n",
      "00:06:333034\n",
      "under ratio 1:1.036667 testing accuracy 0.844000, recall is 0.481050', auc is 0.869659\n",
      "00:06:923525\n",
      "under ratio 1:1.041111 testing accuracy 0.844667, recall is 0.483965', auc is 0.869629\n",
      "00:08:993914\n",
      "under ratio 1:1.045556 testing accuracy 0.844667, recall is 0.483965', auc is 0.869712\n",
      "00:06:514583\n",
      "under ratio 1:1.050000 testing accuracy 0.845333, recall is 0.486880', auc is 0.869863\n",
      "00:06:779875\n"
     ]
    }
   ],
   "source": [
    "for i in irange:\n",
    "    times = time()\n",
    "    clf = SVC(kernel = \"linear\"\n",
    "              ,gamma=\"auto\"\n",
    "              ,cache_size = 5000\n",
    "              ,class_weight = {1:1+i}\n",
    "             ).fit(Xtrain, Ytrain)\n",
    "    result = clf.predict(Xtest)\n",
    "    score = clf.score(Xtest,Ytest)\n",
    "    recall = recall_score(Ytest, result)\n",
    "    auc = roc_auc_score(Ytest,clf.decision_function(Xtest))\n",
    "    print(\"under ratio 1:%f testing accuracy %f, recall is %f', auc is %f\" %(1+i,score,recall,auc))\n",
    "    print(datetime.datetime.fromtimestamp(time()-times).strftime(\"%M:%S:%f\"))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 145,
   "metadata": {},
   "outputs": [],
   "source": [
    "irange_ = np.linspace(0.018889,0.027778,10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 146,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "under ratio 1:1.018889 testing accuracy 0.844667, recall is 0.478134', auc is 0.869213\n",
      "00:07:301489\n",
      "under ratio 1:1.019877 testing accuracy 0.844000, recall is 0.478134', auc is 0.869228\n",
      "00:06:855660\n",
      "under ratio 1:1.020864 testing accuracy 0.844000, recall is 0.478134', auc is 0.869218\n",
      "00:06:545535\n",
      "under ratio 1:1.021852 testing accuracy 0.844667, recall is 0.478134', auc is 0.869188\n",
      "00:06:298092\n",
      "under ratio 1:1.022840 testing accuracy 0.844667, recall is 0.478134', auc is 0.869220\n",
      "00:05:309544\n",
      "under ratio 1:1.023827 testing accuracy 0.844667, recall is 0.481050', auc is 0.869188\n",
      "00:04:701487\n",
      "under ratio 1:1.024815 testing accuracy 0.844667, recall is 0.481050', auc is 0.869231\n",
      "00:04:710405\n",
      "under ratio 1:1.025803 testing accuracy 0.844000, recall is 0.481050', auc is 0.869253\n",
      "00:05:062793\n",
      "under ratio 1:1.026790 testing accuracy 0.844000, recall is 0.481050', auc is 0.869314\n",
      "00:05:193115\n",
      "under ratio 1:1.027778 testing accuracy 0.844667, recall is 0.481050', auc is 0.869374\n",
      "00:05:308805\n"
     ]
    }
   ],
   "source": [
    "for i in irange_:\n",
    "    times = time()\n",
    "    clf = SVC(kernel = \"linear\"\n",
    "              ,gamma=\"auto\"\n",
    "              ,cache_size = 5000\n",
    "              ,class_weight = {1:1+i}\n",
    "             ).fit(Xtrain, Ytrain)\n",
    "    result = clf.predict(Xtest)\n",
    "    score = clf.score(Xtest,Ytest)\n",
    "    recall = recall_score(Ytest, result)\n",
    "    auc = roc_auc_score(Ytest,clf.decision_function(Xtest))\n",
    "    print(\"under ratio 1:%f testing accuracy %f, recall is %f', auc is %f\" %(1+i,score,recall,auc))\n",
    "    print(datetime.datetime.fromtimestamp(time()-times).strftime(\"%M:%S:%f\"))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 147,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.linear_model import LogisticRegression as LR"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 148,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.8486666666666667"
      ]
     },
     "execution_count": 148,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "logclf = LR(solver=\"liblinear\").fit(Xtrain, Ytrain)\n",
    "logclf.score(Xtest,Ytest)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 151,
   "metadata": {},
   "outputs": [],
   "source": [
    "C_range = np.linspace(5,10,10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 152,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "5.0 0.8493333333333334\n",
      "5.555555555555555 0.8493333333333334\n",
      "6.111111111111111 0.8486666666666667\n",
      "6.666666666666667 0.8493333333333334\n",
      "7.222222222222222 0.8493333333333334\n",
      "7.777777777777778 0.8493333333333334\n",
      "8.333333333333334 0.8493333333333334\n",
      "8.88888888888889 0.8493333333333334\n",
      "9.444444444444445 0.8493333333333334\n",
      "10.0 0.8493333333333334\n"
     ]
    }
   ],
   "source": [
    "for C in C_range:\n",
    "    logclf = LR(solver=\"liblinear\",C=C).fit(Xtrain, Ytrain)\n",
    "    print(C,logclf.score(Xtest,Ytest))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 153,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "testing accuracy 0.795333,recall is 0.772595', auc is 0.870165\n",
      "00:19:125873\n"
     ]
    }
   ],
   "source": [
    "times = time()\n",
    "clf = SVC(kernel = \"linear\",C=3.1663157894736838,cache_size = 5000\n",
    "          ,class_weight = \"balanced\"\n",
    "         ).fit(Xtrain, Ytrain)\n",
    "result = clf.predict(Xtest)\n",
    "score = clf.score(Xtest,Ytest)\n",
    "recall = recall_score(Ytest, result)\n",
    "auc = roc_auc_score(Ytest,clf.decision_function(Xtest))\n",
    "print(\"testing accuracy %f,recall is %f', auc is %f\" % (score,recall,auc))\n",
    "print(datetime.datetime.fromtimestamp(time()-times).strftime(\"%M:%S:%f\"))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 154,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.metrics import roc_curve as ROC\n",
    "import matplotlib.pyplot as plt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 155,
   "metadata": {},
   "outputs": [],
   "source": [
    "FPR, Recall, thresholds = ROC(Ytest,clf.decision_function(Xtest),pos_label=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 156,
   "metadata": {},
   "outputs": [],
   "source": [
    "area = roc_auc_score(Ytest,clf.decision_function(Xtest))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 157,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.8701653769298805"
      ]
     },
     "execution_count": 157,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "area"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 158,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAEWCAYAAACJ0YulAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4xLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvDW2N/gAAIABJREFUeJzt3XmcT/X+wPHXu5lCJdnqJsvYzRBTJnvWbJVoJ1fxG4RUSIt2kmuNZMQkKYpKudR1L6nkJtKUSZYw2ZeyxCBLY7x/f5wzc7/GLN9hvtvM+/l4ePiecz7fc97nzPf7fZ/P53PO54iqYowxxgBcFOgAjDHGBA9LCsYYY9JZUjDGGJPOkoIxxph0lhSMMcaks6RgjDEmnSWFfEBEuorI4kDHEWgiUl5EjolImB+3GSEiKiLh/tqmL4nIOhFpfh7vy7efQRFpLiK7Ah2Hv1hSyGMisk1ETrg/Tr+JyAwRudyX21TV91S1jS+3EYzcY31z2rSq7lDVy1U1NZBxBYqbnKpcyDpUtaaqLs1hO+ckwoL6GcyPLCn4RgdVvRyIBq4HhgQ4nvMSyLPf/HLmnRt2vE0wsKTgQ6r6G7AIJzkAICKFRGSsiOwQkd9FZIqIFPFY3lFEEkXkiIj8KiLt3PnFROQtEdkrIrtFZHhaM4mIdBeRb9zXU0RkrGccIjJfRAa5r8uIyMcisl9EtorIox7lXhKRuSIyS0SOAN0z7pMbx7vu+7eLyHMicpFHHMtF5HURSRaRX0SkVYb3ZrcPy0VkvIj8AbwkIpVF5EsROSgiB0TkPRG50i0/EygPfOrWyp7MeAYrIktF5GV3vUdFZLGIlPKI5wF3Hw6KyPMZax4Z9ruIiIxzyyeLyDeefzegq/s3PSAiz3q8r56IrBCRw+5+TxKRSzyWq4g8LCKbgc3uvNdEZKf7GfhBRG7yKB8mIs+4n42j7vJyIrLMLfKTezzuc8vf5n6eDovItyJS22Nd20TkKRFZA/wpIuGex8CNPcGN43cRedV9a9q2Drvbauj5GXTfW1NEPheRP9z3PpPFcc3y++DGttLj79lXnOatwu70R+LUxpNFZJmI1PRY7wwRmSwi/3ZjXC4ifxORCSJyyP1sXp/hWAwRkfXu8rfTtpNJzFl+h/IFVbV/efgP2Abc7L4uC/wMvOaxfAKwACgBFAU+Bf7hLqsHJAOtcRL2tUANd9k/ganAZcBVwCrgIXdZd+Ab93VTYCcg7nRx4ARQxl3nD8ALwCVAJWAL0NYt+xKQAnRyyxbJZP/eBea7sUcAm4BYjzhOAwOBi4H73P0p4eU+nAYeAcKBIkAV91gUAkrj/BhNyOxYu9MRgALh7vRS4Fegmru+pcBId1kUcAxo4h6Lse6+35zF3zXOff+1QBjQyI0rbZtvutuoA5wCIt331QUauPsUAWwABnisV4HPcT4PRdx5fwdKuu95HPgNKOwuewLnM1UdEHd7JT3WVcVj3TcA+4D6bswPuseskMfxSwTKeWw7/ZgCK4Bu7uvLgQaZHedMPoNFgb1u7IXd6fpZHNfsvg8XuX/zl4CqwCHgeo/3/p/7nkLuehI9ls0ADrjHvzDwJbAVeMA9FsOBrzJ8lta6x6IEsBwY7i5rDuzyiCnL71B++BfwAPLbP/fDdQw46n5xvgCudJcJ8CdQ2aN8Q2Cr+3oqMD6TdV6N80NTxGNel7QPdYYvpAA7gKbudC/gS/d1fWBHhnUPAd52X78ELMtm38LcOKI85j0ELPWIYw9uQnLnrQK6ebkPO7LatlumE7A6w7HOKSk857G8H/Af9/ULwGyPZZcCf5FJUnB/CE4AdTJZlrbNshn2uXMW+zAAmOcxrUDLHPb7UNq2gY1AxyzKZUwKbwAvZyizEWjmcfz+L5PPb1pSWAYMBUplsc9ZJYUunn+nbPYr2++Dx7b+wEmmQ7JZ15VuTMXc6RnAmx7LHwE2eExfBxzOsN99PKZvAX51Xzfnf0kh2+9Qfvhn7Yi+0UlVl4hIM+B9oBRwGOds91LgBxFJKys4P7bgnKUszGR9FXDOvPd6vO8inBrBWVRVRWQOzhdzGXA/MMtjPWVE5LDHW8KA/3pMn7NOD6Vwzo62e8zbjnP2nGa3ut8Uj+VlvNyHs7YtIlcBE4GbcM4IL8L5gcyN3zxeH8c548WNKX17qnpcRA5msY5SOGebv+Z2OyJSDXgViMH524fjnGl6yrjfjwM93RgVuMKNAZzPSHZxeKoAPCgij3jMu8Rdb6bbziAWGAb8IiJbgaGq+pkX2/U2xpy+D6jqNhH5CudHOi69kNPs+Apwj7ueM+6iUji1U4DfPbZ1IpPpjBeAeB6LtM9tRt58h0Ka9Sn4kKp+jXPGktbGfwDnw1hTVa90/xVTp1ManA9l5UxWtRPnLLuUx/uuUNWamZQFmA3cLSIVcM5sPvZYz1aPdVypqkVV9RbPsLPZpQM4TSwVPOaVB3Z7TF8rHt9wd/keL/ch47b/4c6rrapX4DSrSDblc2MvTvMe4PQZ4DTZZOYAcJLM/zY5eQP4Bajq7sMznL0P4LEfbv/BU8C9QHFVvRLnRy7tPVl9RjKzE3glw9/7UlWdndm2M1LVzaraBaepbxQwV0Quy+49uYwxp+8DInILTu3hC2CMx3vvBzoCNwPFcGoUcO6xzY1yHq/TPrcZefMdCmmWFHxvAtBaRKJV9QxO2/N49ywYEblWRNq6Zd8CeohIKxG5yF1WQ1X3AouBcSJyhbusslsTOYeqrgb2A9OARaqadlazCjjiduAVcTsta4nIjd7siDqXen4IvCIiRd2kM4j/1UTA+QF5VEQuFpF7gEhgYW73wVUUpynusIhci9Oe7ul3nDbd8zEX6CAijcTp+B1KFj8o7t9tOvCq28kY5nauFvJiO0WBI8AxEakB9PWi/Gmcv1+4iLyAU1NIMw14WUSqiqO2iKQls4zH402gj4jUd8teJiK3ikhRL+JGRP4uIqXd/U/7DKW6sZ0h62P/GfA3ERngdiQXFZH6GQvl9H0Q56KAt3BqTQ/i/L3SfnyL4pxkHMSpbYzwZp9y8LCIlBWREjjJ+4NMylzQdygUWFLwMVXdj9M5+7w76ykgCVgpzhU+S3A6DVHVVUAPYDzO2eHX/O+s/AGcqv96nCaUucA12Wx6Ns5Z1PsesaQCHXCuhtqKc6Y2DedMy1uP4LQDbwG+cdc/3WP5dzidggdwqvd3q2pas0xu92EoTmdpMvAv4JMMy/8BPCfOlTWDc7EPqOo6d1/m4NQajuJ0yp7K4i2DcTp4v8dp4x6Fd9+fwThntUdxfgAz+6HxtAj4N04H/nacGopns8arOIl5MU6yeQungxucPqF33ONxr6om4PQpTcI53klkckVZNtoB60TkGPAaTj/JSVU9jvO3Xe5uq4Hnm1T1KM4FAh1wmtU2Ay2y2EaW3wcgHpivqgvdz1AsMM1Ngu+6x2c3zudpZS72Kyvv4xzXLe6/4RkL5NF3KKilXaFizAUTke5AT1VtEuhYckucGwwP4zTzbA10PMa/RGQbzmd3SaBjCTSrKZgCS0Q6iMilbjv5WJyawLbARmVMYFlSMAVZR5zOxD04TV6d1arOpoCz5iNjjDHprKZgjDEmXcjdvFaqVCmNiIgIdBjGGBNSfvjhhwOqWjqnciGXFCIiIkhISAh0GMYYE1JEZHvOpaz5yBhjjAdLCsYYY9JZUjDGGJPOkoIxxph0lhSMMcak81lSEJHpIrJPRNZmsVxEZKKIJInIGhG5wVexGGOM8Y4vawozcEZZzEp7nKEFqgK9ccadN8YYE0A+u09BVZeJSEQ2RToC77pjzawUkStF5Bp33H1jjMkb8fHw/vs5lwtif6amsj8lhYgGDWDCBJ9uK5A3r13L2ePE73LnnZMURKQ3Tm2C8uXL+yU4Y0wA5eUP+ddfO/83y+55TsHry0OH6LVpE8XCw0moX9/nHcGBTAqZPeUq09H5VDUe54EbxMTE2Ah+xuRHnokgL3/ImzWD+++H3r0vfF1+dPjwYZ544gmmTZtGlSpVGD9tGhf5IbEFMins4uxnopYl82eiGmOCla/O6EP0hzyvpKam0qhRIzZu3MiTTz7JSy+9RJEiRXJ+Yx4IZFJYAPQXkTk4D5dPtv4EY/KAP9vQ7Yw+Tx08eJASJUoQFhbGK6+8Qrly5YiJifFrDD5LCiIyG2gOlBKRXcCLwMUAqjoFWAjcgvN81uM4zyY2xmTF2x97f7ah2w95nlBV3nvvPR577DFGjhxJr169uOOOOwISiy+vPuqSw3IFHvbV9o0JWVn9+Hv7Y28/1CFl586d9OnTh4ULF9KgQQMaN24c0HhCbuhsY/IlbzpZ7cc+35k9ezYPPfQQqampTJgwgf79+xMWFhbQmCwpGBMIGWsD1slaIBUvXpz69esTHx9PxYoVAx0OEILPaI6JiVF7yI4JCdn1AWRWG7BEkO+dPn2a8ePH89dff/Hss88CTn+CSGZX6OctEflBVXPstbaagjF5LS0ZZNcHYLWBAuenn34iNjaWH374gXvvvTc9GfgjIeSGJQVjzpc3HcL2w1/gnTp1iuHDhzNy5EhKlCjBRx99xF133RV0ySCNJQVjzkd8PDz0kPPaOoRNNjZv3syoUaO4//77efXVVylZsmSgQ8qWJQVjsuJNn8DUqfbjb85x7Ngx5s+fT9euXalVqxa//PILlSpVCnRYXrGH7BiTUXw8NG/u1ATSfvwzatbMEoLJ1Oeff851111Ht27d2LBhA0DIJASwmoIx2V8eas1AxkuHDh1i8ODBTJ8+nWrVqvH1118TGRkZ6LByzZKCKVgyaxLKeJWQJQOTS6mpqTRu3JhNmzYxZMgQXnjhBQoXLhzosM6LJQVTsLz/PiQmQnT0/+ZZEjDn6cCBA+kD2I0YMYLy5ctzww2h/WRhSwomf8lp0Li0hLB0qd9CMvmPqjJz5kwGDBjAyJEj6d27N506dQp0WHnCkoIJfbl5OEt0tFMrMOY8bd++nYceeohFixbRqFEjmjZtGuiQ8pQlBRN6bNwgEyCzZs2ib9++qCqvv/46/fr146KL8tdFnJYUTPDLLgmk/W+JwPhB6dKlady4MVOnTqVChQqBDscnbEA8E/yaNz+3c9iSgPGDlJQUxo0bR0pKCs8//zzgvwHs8poNiGfyh/h4p2bQrJl1Dhu/Wr16NbGxsaxevZrOnTsH7QB2ec2SggkuWTUVWeew8ZOTJ08ybNgwRo8eTalSpfj444+58847Ax2W3+SvHhIT2tIGmfMcWsKGkzB+lpSUxNixY3nggQfYsGFDgUoIYDUFEwwyPn/AkoDxs2PHjjFv3jy6detGrVq12LhxY9A8Cc3fLCkY//D2KWTWgWz8bNGiRfTu3ZudO3cSExNDZGRkgU0IYEnB+JK3N5VZMjABcPDgQQYNGsS7775LjRo1+O9//xuSA9jlNUsKxnc8xxmyH34TRNIGsEtKSuLZZ5/lueeeC9kB7PKaJQWTtzxrBzbOkAky+/fvp2TJkoSFhTFq1CgqVKhAtOf9L8auPjJ5IO2hNBkfTGPjDJkgoaq8/fbbVKtWjTfffBOAjh07WkLIhNUUzPnLeNWQjT1kgtC2bdvo3bs3n3/+OTfddBMtWrQIdEhBzZKCyZ2sOo8tEZggNHPmTPr27YuIMHnyZB566KF8N4BdXrOkYLyXdnMZWK3AhISrr76apk2bMmXKFMqXLx/ocEKCJQXjHc+EYDeXmSCVkpLC6NGjSU1N5YUXXqBNmza0adMm0GGFFKtHmeyldSJbQjBB7scff+TGG2/kueeeY+PGjYTaCNDBwmoKJnOZdSJbU5EJQidOnGDo0KGMHTuW0qVLM2/evHzzaMxA8GlSEJF2wGtAGDBNVUdmWF4eeAe40i3ztKou9GVMxoMNPWHygS1btvDqq6/SvXt3xowZQ/HixQMdUkjzWVIQkTAgDmgN7AK+F5EFqrreo9hzwIeq+oaIRAELgQhfxWSwoSdMvnDkyBE++eQTunfvTs2aNdm8eXO+fRKav/myplAPSFLVLQAiMgfoCHgmBQWucF8XA/b4MB5jVw+ZfGDhwoX06dOH3bt3U79+fSIjIy0h5CFfJoVrgZ0e07uA+hnKvAQsFpFHgMuAmzNbkYj0BnoDdlnZ+bChqU0+cODAAQYOHMisWbOIiopi+fLlNoCdD/jy6qPMnlmX8XKALsAMVS0L3ALMFJFzYlLVeFWNUdWY0qVL+yDUfC5tYDp7YI0JUWkD2M2ZM4cXXniBH3/8kQYNGgQ6rHzJlzWFXUA5j+mynNs8FAu0A1DVFSJSGCgF7PNhXAVHWg3BBqYzIer333+ndOnShIWFMXbsWCpUqEDt2rUDHVa+5suawvdAVRGpKCKXAJ2BBRnK7ABaAYhIJFAY2O/DmAoWz4RgA9OZEKKqvPXWW1SvXp34+HgAOnToYAnBD3xWU1DV0yLSH1iEc7npdFVdJyLDgARVXQA8DrwpIgNxmpa6q91xcuGshmBC2JYtW+jVqxdffvklzZo14+abM+1qND7i0/sU3HsOFmaY94LH6/VAY1/GUOBkvMLIaggmhLzzzjv069ePsLAwpkyZQq9evWwAOz+zO5rzm7R7EKxD2YSgMmXK0LJlS9544w3Kli0b6HAKJEsK+Ul8vHPZabNmlhBMSPjrr78YOXIkZ86c4aWXXqJ169a0bt060GEVaFYvy0/SagnWZGRCwPfff0/dunV58cUX2bJliw1gFyQsKeQHaSOZpt2LYLUEE8SOHz/O4MGDadCgAYcOHWLBggW8++67iGR2a5PxN2s+CnXWsWxCzNatW3n99dfp1asXo0aNolixYoEOyXiwpBDqrGPZhIDk5GQ++eQTevToQc2aNUlKSqJcuXI5v9H4nTUfhSprMjIh4l//+hc1a9akZ8+e/PLLLwCWEIKYJYVQlNZk9PXXdreyCVr79++na9eu3HbbbRQvXpwVK1ZQo0aNQIdlcmDNR6HERjs1ISI1NZUmTZqwdetWhg4dytNPP80ll1wS6LCMFywphAJ7NKYJEb/99htXXXUVYWFhjBs3joiICGrVqhXosEwuWPNRsPNsKkob+nrpUksIJqicOXOGqVOnUq1aNaZOnQrAbbfdZgkhBFlNIRhl9shMayoyQSopKYlevXqxdOlSWrZsSdu2bQMdkrkAVlMIRmkjnII9GMcEtbfffpvrrruOH3/8kTfffJMlS5ZQqVKlQIdlLoDVFIKVDXltQkD58uVp27YtcXFxXHvttYEOx+QBSwrBxnNQO2OCzKlTp/jHP/7BmTNnGDZsGK1ataJVq1aBDsvkIWs+ChZpN6OlDVlh9x6YIPPdd99Rt25dhg4dyo4dO2wAu3zKkkKwSOtHsD4EE2T+/PNPBg0aRMOGDUlOTuazzz5jxowZNoBdPmXNR8HAs8nI+hFMkNm+fTuTJ0+mT58+jBw5kiuuuCLQIRkfsqQQaJ6jnFqTkQkShw8fZu7cufTs2ZOoqCiSkpLsSWgFhDUfBZqNcmqCzPz584mKiqJPnz7pA9hZQig4LCkEAxvl1ASBffv20blzZzp16kTp0qVZuXKlDWBXAFnzkTGG1NRUGjduzI4dOxg+fDhPPvkkF198caDDMgFgSSGQ7J4EE2B79uzhb3/7G2FhYbz22mtEREQQFRUV6LBMAFnzUSCl9SdYB7PxszNnzvDGG29Qo0YNpkyZAsAtt9xiCcFYUggYz1qC9ScYP9q0aRMtWrSgX79+1K9fn/bt2wc6JBNErPnInzIb/dRqCcaP3nrrLfr370/hwoWZPn063bt3t5vQzFmspuBPNvqpCbCIiAjat2/P+vXr6dGjhyUEcw6rKfhDWg0hMdFGPzV+derUKV5++WUAhg8fbgPYmRxZTcHXPJ+cFh1tzUXGb7799luio6N55ZVX2Lt3rw1gZ7xiScGXPIewsMdoGj85duwYjz32GE2aNOH48eP85z//4a233rKmIuMVnyYFEWknIhtFJElEns6izL0isl5E1onI+76Mx+9sCAsTADt27GDq1Kk8/PDDrF271h6PaXLFZ30KIhIGxAGtgV3A9yKyQFXXe5SpCgwBGqvqIRG5ylfxBIxdcmr84NChQ3z00Uf07t2bqKgotmzZQpkyZQIdlglBvqwp1AOSVHWLqv4FzAE6ZijTC4hT1UMAqrrPh/EYky/NmzePqKgo+vXrx8aNGwEsIZjz5sukcC2w02N6lzvPUzWgmogsF5GVItIusxWJSG8RSRCRhP379/so3DyWdnOaMT7y22+/cc8993DnnXfyt7/9jVWrVlG9evVAh2VCnC8vSc2sVyvj5Q/hQFWgOVAW+K+I1FLVw2e9STUeiAeIiYkJjUsobAgL40OpqancdNNN7Ny5kxEjRjB48GAbwM7kCV8mhV1AOY/pssCeTMqsVNUUYKuIbMRJEt/7MC7fsyEsjI/s2rWLMmXKEBYWxsSJE6lYsaINb23ylC+bj74HqopIRRG5BOgMLMhQ5p9ACwARKYXTnLTFhzH5h9USTB47c+YMr7/+OjVq1OCNN94AoH379pYQTJ7zWVJQ1dNAf2ARsAH4UFXXicgwEbndLbYIOCgi64GvgCdU9aCvYvILqyWYPPbLL7/QtGlTHn30UZo0acJtt90W6JBMPubTYS5UdSGwMMO8FzxeKzDI/Rf67HnLJo9NmzaN/v37c+mll/LOO+/QrVs3uwnN+JSNfZSX7GY1k8cqV65Mhw4dmDRpEldffXWgwzEFgITaeCgxMTGakJAQ6DDOZgPemTxy8uRJhg0bBsCIESMCHI3JT0TkB1WNyalctjUFEcm2WUdVX81tYPlKWjJIux+hWTNrNjLnbfny5cTGxrJx40Z69uyJqlpTkfG7nJqPivolilCVVjtISwbWZGTOw9GjR3nmmWeIi4ujQoUKLFq0iDZt2gQ6LFNAZZsUVHWovwIJOZ5XGVlzkbkAu3btYtq0aTzyyCO88sorXH755YEOyRRgOTUfTcxuuao+mrfhhBC7F8FcgIMHD/Lhhx/St29fIiMj2bJlC9dcc02gwzImx+ajH/wSRaixexHMeVJVPv74Yx5++GH++OMPWrZsSfXq1S0hmKCRU/PRO/4KJKRYLcGch7179/Lwww8zb9486taty+LFi20AOxN0vLpPQURKA08BUUDhtPmq2tJHcQUvqyWY85A2gN3u3bsZPXo0AwcOJDzcbhMywcfbT+V7wAfArUAf4EEgRMawzkN2x7LJpZ07d3LttdcSFhZGXFwcFStWpFq1aoEOy5gseTv2UUlVfQtIUdWvVfX/gAY+jCs42R3LxkupqalMnDjxrAHs2rZtawnBBD1vawop7v97ReRWnCGwy/ompCBnzUYmBxs2bCA2NpYVK1bQvn17OnToEOiQjPGat0lhuIgUAx4HXgeuAAb6LCpjQlR8fDyPPPIIRYsWZebMmXTt2tXuSjYhxaukoKqfuS+TcZ9/YIw5V9WqVbnjjjuYOHEiV111VaDDMSbXvOpTEJF3RORKj+niIjLdd2EFIXvmssnEiRMneOqpp3j66acBaNGiBXPmzLGEYEKWtx3NtT2fm6yqh4DrfRNSkLJ7E0wGy5Yto06dOowePZrk5GRCbcRhYzLjbVK4SESKp02ISAkK4rMYrJPZAEeOHKFfv340a9aM1NRUvvjiC9544w3rOzD5grc/7OOAb0VkLqDAvcArPovKmCC2Z88eZsyYwaBBgxg2bBiXXXZZoEMyJs9429H8rogkAC0BAe5U1fU+jcyYIHLgwAE+/PBD+vXrR40aNdi6das9Cc3kS942HwGUAP5U1deB/SJS0UcxGRM0VJUPPviAqKgoBgwYwKZNmwAsIZh8y9urj17EGftoiDvrYmCWr4IyJhjs2bOHTp060blzZypUqMAPP/xgdySbfM/bPoU7cK42+hFAVfeIiD2VzeRbqampNG3alN27dzN27Fgee+wxG8DOFAjefsr/UlUVEQUQEetZM/nS9u3bKVu2LGFhYUyePJlKlSpRpUqVQIdljN9426fwoYhMBa4UkV7AEmCa78IKMnbjWr6XmprKq6++SmRkZPoAdm3atLGEYAocb68+GisirYEjQHXgBVX93KeRBRO7cS1fW7t2LbGxsaxatYrbbruNTp06BTokYwLG60ZSNwl8DiAiYSLSVVXf81lkwcZuXMuXpkyZwqOPPkqxYsV4//336dy5s92EZgq0bJuPROQKERkiIpNEpI04+gNbcG5gMyYkpQ1JERkZyT333MP69evp0qWLJQRT4OVUU5gJHAJWAD2BJ4BLgI6qmujj2IzJc8ePH+eFF14gLCyMUaNG0axZM5o1axbosIwJGjl1NFdS1e6qOhXoAsQAtxWohGCdzPnG0qVLqV27NuPGjePYsWM2gJ0xmcgpKaQ9cQ1VTQW2qupR34YUZKyTOeQlJyfz0EMP0aKF8yiQL7/8kri4OGsqMiYTOTUf1RGRI+5rAYq40wKoql7h0+iChXUyh7S9e/cya9YsBg8ezNChQ7n00ksDHZIxQSvbmoKqhqnqFe6/oqoa7vE6x4QgIu1EZKOIJInI09mUu1tEVERizmcnjMlo//79vP766wDUqFGDbdu2MWbMGEsIxuQgNwPi5YqIhAFxQHsgCugiIlGZlCsKPAp856tYzpv1J4QcVeX9998nMjKSxx9/PH0Au9KlSwc4MmNCg8+SAlAPSFLVLar6FzAH6JhJuZeB0cBJH8aSe/Hx8NBDzmvrTwgJO3fupEOHDnTt2pUqVaqwevVqG8DOmFzyZVK4FtjpMb3LnZdORK4HyqnqZ9mtSER6i0iCiCTs378/7yPNyDMhTJ1q/Qkh4PTp0zRv3pyvvvqK8ePHs3z5cmrWrBnosIwJOb4c9jGzSzvSrwEUkYuA8UD3nFakqvFAPEBMTIzvryNMu+LIEkLQ27ZtG+XKlSM8PJypU6dSqVIlKlWqFOiwjAlZvqwp7ALKeUyXBfZ4TBcFagFLRWQb0ABYEPDO5rR+BLviKKidPn2asWPHEhkZyeTJkwG4+eabLSEYc4F8WVP4HqjqPqFtN9AZSG+cV9VkoFTatIgsBQaraoIPY8qZ3ZcQ9NasWUNsbCwJCQl07NiRu+66K9AuEEM3AAAbB0lEQVQhGZNv+KymoKqngf7AImAD8KGqrhORYSJyu6+2myeslhC0Jk+eTN26ddm+fTsffPAB8+bNo0yZMoEOy5h8w6ePklLVhcDCDPNeyKJsc1/GkqP4eKeWkJgI0dEBDcWcS1UREWrVqkXnzp0ZP348pUqVyvmNxphcsecLwtlXGzVrZk1HQeTPP//kueeeIzw8nDFjxtC0aVOaNm0a6LCMybd82dEcOjyvNlq61JqOgsQXX3zBddddx4QJEzh16pQNYGeMH1hSSGP9CEHj8OHD9OzZk5tvvpnw8HCWLVvGxIkTbQA7Y/zAkoIJOr///jtz5szhqaee4qeffuKmm24KdEjGFBjWp2CCQloieOyxx6hevTrbtm2zjmRjAsBqCiagVJVZs2YRFRXFk08+yebNmwEsIRgTIJYUbCTUgNmxYwe33nor3bp1o3r16iQmJlK1atVAh2VMgWbNR3YHc0CkDWC3b98+Jk6cSL9+/QgLCwt0WMYUeJYUwK488qMtW7ZQoUIFwsPDefPNN6lcuTIRERGBDssY47LmI+MXp0+fZtSoUURFRREXFwdAq1atLCEYE2QKdlKw/gS/SExMpH79+jz99NPccsst3HPPPYEOyRiThYKdFKw/wecmTZrEjTfeyO7du5k7dy6ffPIJ11xzTaDDMsZkoWAnBbD+BB9JG5Kidu3adO3alfXr19sQ18aEAOtoNnnq2LFjPPvss1x88cWMHTvWBrAzJsQU3JqC9SfkucWLF1OrVi1ef/11UlJSbAA7Y0JQwU0K1p+QZw4dOkSPHj1o27YthQsXZtmyZbz22ms2gJ0xIajgJgWw/oQ8sm/fPubOncuQIUNITEykSZMmgQ7JGHOerE/BnJfffvuN2bNnM3DgwPQB7EqWLBnosIwxF6hg1xRMrqkq77zzDlFRUQwZMiR9ADtLCMbkD5YUjNe2bdtGu3bt6N69O1FRUTaAnTH5kDUfGa+cPn2aFi1acODAAeLi4ujTpw8XXWTnFMbkN5YUTLaSkpKoWLEi4eHhTJ8+nUqVKlGhQoVAh2WM8ZGCeapn9yjkKCUlhREjRlCzZs30AexatGhhCcGYfK5g1hTsHoVs/fjjj8TGxpKYmMg999zDfffdF+iQjDF+UjBrCmD3KGRh4sSJ1KtXj99++41PPvmEDz/8kKuvvjrQYRlj/KTgJgVzlrQhKa6//noeeOAB1q9fzx133BHgqIwx/lYwm49MuqNHjzJkyBAKFSrEuHHjuOmmm7jpppsCHZYxJkCsplCA/ec//6FWrVpMnjwZVbUB7IwxlhQKooMHD/Lggw/Svn17LrvsMpYvX86rr75qA9gZYwpgUrDLUTl48CDz5s3j+eefZ/Xq1TRs2DDQIRljgoRPk4KItBORjSKSJCJPZ7J8kIisF5E1IvKFiPj+IvgCejnq3r17GTt2LKpKtWrV2L59O8OGDaNQoUKBDs0YE0R8lhREJAyIA9oDUUAXEYnKUGw1EKOqtYG5wGhfxXOWAnQ5qqoyffp0IiMjef7550lKSgKgePHiAY7MGBOMfFlTqAckqeoWVf0LmAN09Cygql+p6nF3ciVQ1ofxFDhbt26lTZs2xMbGUqdOHX766ScbwM4Yky1fXpJ6LbDTY3oXUD+b8rHAvzNbICK9gd4A5cuXz6v48rXTp0/TsmVLDh48yBtvvEHv3r1tADtjTI58mRQyu5Ql02seReTvQAzQLLPlqhoPxAPExMTYdZPZ2Lx5M5UqVSI8PJy3336bypUrU65cuUCHZYwJEb48ddwFeP4alQX2ZCwkIjcDzwK3q+opH8aTr6WkpDB8+HBq1arFpEmTAGjevLklBGNMrvgyKXwPVBWRiiJyCdAZWOBZQESuB6biJIR9PozFkU8vR01ISCAmJobnn3+eO++8ky5dugQ6JGNMiPJZUlDV00B/YBGwAfhQVdeJyDARud0tNga4HPhIRBJFZEEWq8sb+fBy1Ndee4369etz4MAB5s+fz+zZs7nqqqsCHZYxJkT5dOwjVV0ILMww7wWP1zf7cvuZyieXo6oqIkJMTAyxsbGMHj2aK6+8MtBhGWNCnA2IF2KOHDnCU089ReHChRk/fjyNGzemcePGgQ7LGJNP2DWKIWThwoXUrFmT+Ph4wsPDbQA7Y0yes6QQAg4cOMDf//53br31VooVK8a3337LmDFjbAA7Y0yes6QQAg4dOsSnn37Kiy++yI8//kj9+tndA2iMMefP+hSC1O7du3nvvfd44oknqFq1Ktu3b7eOZGOMz1lNIcioKm+++SZRUVG89NJL/PrrrwCWEIwxfmFJIYj8+uuvtGrVit69e3PDDTewZs0aqlSpEuiwjDEFiDUfBYnTp0/TqlUr/vjjD6ZOnUrPnj1tADtjjN9ZUgiwjRs3UrlyZcLDw3nnnXeoXLkyZcvaCOLGmMCwU9EA+euvvxg6dCjXXXcdcXFxADRr1swSgjEmoKymEACrVq0iNjaWtWvXcv/999O1a9dAh2SMMYDVFPxuwoQJNGzYMP3eg/fee49SpUoFOixjjAEsKfhN2pAU9erVo1evXqxbt47bbrstwFEZY8zZrPnIx5KTk3nyyScpUqQIEyZMoFGjRjRq1CjQYRljTKaspuBDn376KVFRUUybNo1ChQrZAHbGmKBnScEH9u/fz/3338/tt99OyZIlWblyJaNGjbIB7IwxQc+Sgg8kJyezcOFChg4dSkJCAjfeeGOgQzLGGK9Yn0Ie2blzJ7NmzeLpp5+mSpUqbN++nWLFigU6LGOMyRWrKVygM2fOMGXKFGrWrMnw4cPTB7CzhGCMCUWWFC7A5s2badmyJX379qVevXr8/PPPNoCdMSakWfPReTp9+jStW7fm8OHDvPXWW/To0cM6ko0xIc+SQi5t2LCBqlWrEh4ezsyZM6lcuTJlypQJdFgmCKWkpLBr1y5OnjwZ6FBMAVK4cGHKli3LxRdffF7vt6TgpVOnTjFixAhGjBjBmDFjGDBgADfddFOgwzJBbNeuXRQtWpSIiAirRRq/UFUOHjzIrl27qFix4nmtw5KCF1auXElsbCzr16+nW7dudOvWLdAhmRBw8uRJSwjGr0SEkiVLsn///vNeh3U052DcuHE0atSIo0ePsnDhQt59911KliwZ6LBMiLCEYPztQj9zlhSycObMGQAaNmxInz59WLt2Le3btw9wVMYY41sFJynEx8PXX+dY7PDhw8TGxvLYY48B0KhRIyZPnswVV1zh6wiNyXNhYWFER0dTq1YtOnTowOHDh9OXrVu3jpYtW1KtWjWqVq3Kyy+/fNb4XP/+97+JiYkhMjKSGjVqMHjw4EDsQrZWr15Nz549Ax1Gtv7xj39QpUoVqlevzqJFizIt88UXX3DDDTcQHR1NkyZNSEpKAmDgwIFER0cTHR1NtWrVuPLKKwFnKJ127dr5JmBVDal/devW1fPSrJkqqE6dmmWRefPm6TXXXKNhYWE6ZMgQPXPmzPltyxhVXb9+faBD0Msuuyz99QMPPKDDhw9XVdXjx49rpUqVdNGiRaqq+ueff2q7du100qRJqqr6888/a6VKlXTDhg2qqpqSkqJxcXF5GltKSsoFr+Puu+/WxMREv24zN9atW6e1a9fWkydP6pYtW7RSpUp6+vTpc8pVrVo1/fMSFxenDz744DllJk6cqD169Eif7t69u37zzTeZbjezzx6QoF78xhasjuZmzaB373Nm79u3j/79+/PRRx8RHR3NZ599xg033BCAAE2+NWAAJCbm7Tqjo2HCBK+LN2zYkDVr1gDw/vvv07hxY9q0aQPApZdeyqRJk2jevDkPP/wwo0eP5tlnn6VGjRoAhIeH069fv3PWeezYMR555BESEhIQEV588UXuuusuLr/8co4dOwbA3Llz+eyzz5gxYwbdu3enRIkSrF69mujoaObNm0diYmL6GXCVKlVYvnw5F110EX369GHHjh2A83Cqxo0bn7Xto0ePsmbNGurUqQM4TzQcMGAAJ06coEiRIrz99ttUr16dGTNm8K9//YuTJ0/y559/8uWXXzJmzBg+/PBDTp06xR133MHQoUMB6NSpEzt37uTkyZM89thj9M7k9yI35s+fT+fOnSlUqBAVK1akSpUqrFq1ioYNG55VTkQ4cuQI4Iydltll7rNnz06PMy3W995775zjcqEKVlLIwpEjR/j888955ZVXeOKJJ877+l5jglVqaipffPEFsbGxgNN0VLdu3bPKVK5cmWPHjnHkyBHWrl3L448/nuN6X375ZYoVK8bPP/8MwKFDh3J8z6ZNm1iyZAlhYWGcOXOGefPm0aNHD7777jsiIiK4+uqruf/++xk4cCBNmjRhx44dtG3blg0bNpy1noSEBGrVqpU+XaNGDZYtW0Z4eDhLlizhmWee4eOPPwZgxYoVrFmzhhIlSrB48WI2b97MqlWrUFVuv/12li1bRtOmTZk+fTolSpTgxIkT3Hjjjdx1113nXFgycOBAvvrqq3P2q3Pnzjz99NNnzdu9ezcNGjRIny5btiy7d+8+573Tpk3jlltuoUiRIlxxxRWsXLnyrOXbt29n69attGzZMn1eTEwMzz33XE6HO9cKbFLYsWMHM2fO5JlnnqFKlSrs2LGDokWLBjosk1/l4ow+L504cYLo6Gi2bdtG3bp1ad26NeA0G2d1lUpurl5ZsmQJc+bMSZ8uXrx4ju+55557CAsLA+C+++5j2LBh9OjRgzlz5nDfffelr3f9+vXp7zly5AhHjx496zu6d+9eSpcunT6dnJzMgw8+yObNmxERUlJS0pe1bt2aEiVKALB48WIWL17M9ddfDzi1nc2bN9O0aVMmTpzIvHnzAGeQy82bN5+TFMaPH+/dweF/T1z0lNnxHT9+PAsXLqR+/fqMGTOGQYMGMW3atPTlc+bM4e67704/bgBXXXUVe/bs8ToWb/m0o1lE2onIRhFJEpGnM1leSEQ+cJd/JyIRvowHnKuKJk+eTM2aNRkxYkT6AHaWEEx+VKRIERITE9m+fTt//fUXcXFxANSsWZOEhISzym7ZsoXLL7+cokWLUrNmTX744Ycc159VcvGcl/GO7ssuuyz9dcOGDUlKSmL//v3885//5M477wSc7+mKFStITEwkMTGR3bt3n/MdLVKkyFnrfv7552nRogVr167l008/PWuZ5zZVlSFDhqSvOykpidjYWJYuXcqSJUtYsWIFP/30E9dff32md6N7dv56/hs5cuQ5ZcuWLcvOnTvTp3ft2nVO09D+/fv56aefqF+/PuAkym+//fasMnPmzKFLly5nzTt58iRFihQ5Z5sXymdJQUTCgDigPRAFdBGRqAzFYoFDqloFGA+M8lU8ABuPH09vM23YsCHr1q2zAexMgVCsWDEmTpzI2LFjSUlJoWvXrnzzzTcsWbIEcGoUjz76KE8++SQATzzxBCNGjGDTpk2A8yP96quvnrPeNm3aMGnSpPTptOajq6++mg0bNqQ3D2VFRLjjjjsYNGgQkZGR6WflGdebmEl/TGRkZPpVOuDUFK699loAZsyYkeU227Zty/Tp09P7PHbv3s2+fftITk6mePHiXHrppfzyyy/nNOGkGT9+fHpC8fyXsekI4Pbbb2fOnDmcOnWKrVu3snnzZurVq3dWmeLFi5OcnJx+rD///HMiIyPTl2/cuJFDhw6d0w+xadOms5rP8oovawr1gCRV3aKqfwFzgI4ZynQE3nFfzwVaiY/u9jmtSts1a/j55595++23WbRoEREREb7YlDFB6frrr6dOnTrMmTOHIkWKMH/+fIYPH0716tW57rrruPHGG+nfvz8AtWvXZsKECXTp0oXIyEhq1arF3r17z1nnc889x6FDh6hVqxZ16tRJb2sfOXIkt912Gy1btuSaa67JNq777ruPWbNmpTcdAUycOJGEhARq165NVFQUU6ZMOed9NWrUIDk5maNHjwLw5JNPMmTIEBo3bkxqamqW22vTpg33338/DRs25LrrruPuu+/m6NGjtGvXjtOnT1O7dm2ef/75s/oCzlfNmjW59957iYqKol27dsTFxaU3Ad1yyy3s2bOH8PBw3nzzTe666y7q1KnDzJkzGTNmTPo6Zs+eTefOnc+pkX311VfceuutFxxjRpJZm1eerFjkbqCdqvZ0p7sB9VW1v0eZtW6ZXe70r26ZAxnW1RvoDVC+fPm627dvz31AAwbwze7dVJ44MccPqTF5YcOGDWed8Zm8N378eIoWLRr09yr4QtOmTZk/f36m/TiZffZE5AdVjclpvb6sKWR2xp8xA3lTBlWNV9UYVY3x7FjKlQkTaPLRR5YQjMlH+vbtS6FChQIdht/t37+fQYMGedWxn1u+TAq7gHIe02WBjF3l6WVEJBwoBvzhw5iMMflI4cKFC+QAlaVLl6ZTp04+Wbcvk8L3QFURqSgilwCdgQUZyiwAHnRf3w18qb5qzzImAOzjbPztQj9zPksKqnoa6A8sAjYAH6rqOhEZJiK3u8XeAkqKSBIwCDi3+96YEFW4cGEOHjxoicH4jbrPUyhcuPB5r8NnHc2+EhMToxmvrzYmGNmT10wgZPXkNW87mgvsHc3G+NrFF1983k+/MiZQCs7Q2cYYY3JkScEYY0w6SwrGGGPShVxHs4jsB87jlmYASgEHciyVv9g+Fwy2zwXDhexzBVXN8e7fkEsKF0JEErzpfc9PbJ8LBtvngsEf+2zNR8YYY9JZUjDGGJOuoCWF+EAHEAC2zwWD7XPB4PN9LlB9CsYYY7JX0GoKxhhjsmFJwRhjTLp8mRREpJ2IbBSRJBE5Z+RVESkkIh+4y78TkQj/R5m3vNjnQSKyXkTWiMgXIlIhEHHmpZz22aPc3SKiIhLyly96s88icq/7t14nIu/7O8a85sVnu7yIfCUiq93P9y2BiDOviMh0EdnnPpkys+UiIhPd47FGRG7I0wBUNV/9A8KAX4FKwCXAT0BUhjL9gCnu687AB4GO2w/73AK41H3dtyDss1uuKLAMWAnEBDpuP/ydqwKrgeLu9FWBjtsP+xwP9HVfRwHbAh33Be5zU+AGYG0Wy28B/o3z5MoGwHd5uf38WFOoBySp6hZV/QuYA3TMUKYj8I77ei7QSjI+FTu05LjPqvqVqh53J1fiPAkvlHnzdwZ4GRgN5Ifxq73Z515AnKoeAlDVfX6OMa95s88KXOG+Lsa5T3gMKaq6jOyfQNkReFcdK4ErRSTPnjOcH5PCtcBOj+ld7rxMy6jzMKBkoKRfovMNb/bZUyzOmUYoy3GfReR6oJyqfubPwHzIm79zNaCaiCwXkZUi0s5v0fmGN/v8EvB3EdkFLAQe8U9oAZPb73uu5MfnKWR2xp/xultvyoQSr/dHRP4OxADNfBqR72W7zyJyETAe6O6vgPzAm79zOE4TUnOc2uB/RaSWqh72cWy+4s0+dwFmqOo4EWkIzHT3+YzvwwsIn/5+5ceawi6gnMd0Wc6tTqaXEZFwnCpndtW1YOfNPiMiNwPPArer6ik/xeYrOe1zUaAWsFREtuG0vS4I8c5mbz/b81U1RVW3AhtxkkSo8mafY4EPAVR1BVAYZ+C4/Mqr7/v5yo9J4XugqohUFJFLcDqSF2QoswB40H19N/Cluj04ISrHfXabUqbiJIRQb2eGHPZZVZNVtZSqRqhqBE4/yu2qGsrPcvXms/1PnIsKEJFSOM1JW/waZd7yZp93AK0ARCQSJyns92uU/rUAeMC9CqkBkKyqe/Nq5fmu+UhVT4tIf2ARzpUL01V1nYgMAxJUdQHwFk4VMwmnhtA5cBFfOC/3eQxwOfCR26e+Q1VvD1jQF8jLfc5XvNznRUAbEVkPpAJPqOrBwEV9Ybzc58eBN0VkIE4zSvdQPskTkdk4zX+l3H6SF4GLAVR1Ck6/yS1AEnAc6JGn2w/hY2eMMSaP5cfmI2OMMefJkoIxxph0lhSMMcaks6RgjDEmnSUFY4wx6SwpmKAjIqkikujxLyKbshFZjSaZy20udUfi/MkdIqL6eayjj4g84L7uLiJlPJZNE5GoPI7zexGJ9uI9A0Tk0gvdtikYLCmYYHRCVaM9/m3z03a7qmodnMESx+T2zao6RVXfdSe7A2U8lvVU1fV5EuX/4pyMd3EOACwpGK9YUjAhwa0R/FdEfnT/NcqkTE0RWeXWLtaISFV3/t895k8VkbAcNrcMqOK+t5U7Tv/P7jj3hdz5I+V/z6cY6857SUQGi8jdOONLvedus4h7hh8jIn1FZLRHzN1F5PXzjHMFHgOhicgbIpIgznMUhrrzHsVJTl+JyFfuvDYissI9jh+JyOU5bMcUIJYUTDAq4tF0NM+dtw9orao3APcBEzN5Xx/gNVWNxvlR3uUOe3Af0Nidnwp0zWH7HYCfRaQwMAO4T1WvwxkBoK+IlADuAGqqam1guOebVXUukIBzRh+tqic8Fs8F7vSYvg/44DzjbIczrEWaZ1U1BqgNNBOR2qo6EWdcnBaq2sId+uI54Gb3WCYAg3LYjilA8t0wFyZfOOH+MHq6GJjktqGn4ozpk9EK4FkRKQt8oqqbRaQVUBf43h3eowhOgsnMeyJyAtiGM/xydWCrqm5yl78DPAxMwnk+wzQR+Rfg9dDcqrpfRLa4Y9Zsdrex3F1vbuK8DGfYB8+nbt0rIr1xvtfX4DxwZk2G9zZw5y93t3MJznEzBrCkYELHQOB3oA5ODfech+ao6vsi8h1wK7BIRHriDDP8jqoO8WIbXT0HzBORTJ+x4Y7HUw9nELbOQH+gZS725QPgXuAXYJ6qqji/0F7HifMEspFAHHCniFQEBgM3quohEZmBMzBcRgJ8rqpdchGvKUCs+ciEimLAXneM/G44Z8lnEZFKwBa3yWQBTjPKF8DdInKVW6aEeP986l+ACBGp4k53A7522+CLqepCnE7czK4AOoozfHdmPgE64TwH4AN3Xq7iVNUUnGagBm7T0xXAn0CyiFwNtM8ilpVA47R9EpFLRSSzWpcpoCwpmFAxGXhQRFbiNB39mUmZ+4C1IpII1MB5ZOF6nB/PxSKyBvgcp2klR6p6EmcEyo9E5GfgDDAF5wf2M3d9X+PUYjKaAUxJ62jOsN5DwHqggqqucuflOk63r2IcMFhVf8J5NvM6YDpOk1SaeODfIvKVqu7HuTJqtrudlTjHyhjARkk1xhjjwWoKxhhj0llSMMYYk86SgjHGmHSWFIwxxqSzpGCMMSadJQVjjDHpLCkYY4xJ9//o3Th1AbKzHQAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "plt.figure()\n",
    "plt.plot(FPR, Recall, color='red',\n",
    "         label='ROC curve (area = %0.2f)' % area)\n",
    "plt.plot([0, 1], [0, 1], color='black', linestyle='--')\n",
    "plt.xlim([-0.05, 1.05])\n",
    "plt.ylim([-0.05, 1.05])\n",
    "plt.xlabel('False Positive Rate')\n",
    "plt.ylabel('Recall')\n",
    "plt.title('Receiver operating characteristic example')\n",
    "plt.legend(loc=\"lower right\")\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 159,
   "metadata": {},
   "outputs": [],
   "source": [
    "maxindex = (Recall - FPR).tolist().index(max(Recall - FPR))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 160,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "-0.08950517388953827"
      ]
     },
     "execution_count": 160,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "thresholds[maxindex]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 161,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.metrics import accuracy_score as AC"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 162,
   "metadata": {},
   "outputs": [],
   "source": [
    "clf = SVC(kernel = \"linear\",C=3.1663157894736838,cache_size = 5000\n",
    "          ,class_weight = \"balanced\"\n",
    "         ).fit(Xtrain, Ytrain)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 163,
   "metadata": {},
   "outputs": [],
   "source": [
    "prob = pd.DataFrame(clf.decision_function(Xtest))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 166,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>y_pred</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2.189193</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0.373116</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>-0.015488</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>-1.136262</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>-0.240851</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          0  y_pred\n",
       "0  2.189193     1.0\n",
       "1  0.373116     1.0\n",
       "2 -0.015488     1.0\n",
       "3 -1.136262     0.0\n",
       "4 -0.240851     0.0"
      ]
     },
     "execution_count": 166,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "prob.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 165,
   "metadata": {},
   "outputs": [],
   "source": [
    "prob.loc[prob.iloc[:,0] >= thresholds[maxindex],\"y_pred\"]=1\n",
    "prob.loc[prob.iloc[:,0] < thresholds[maxindex],\"y_pred\"]=0"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 167,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0"
      ]
     },
     "execution_count": 167,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "prob.loc[:,\"y_pred\"].isnull().sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 168,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "testing accuracy 0.789333,recall is 0.804665\n",
      "00:00:005985\n"
     ]
    }
   ],
   "source": [
    "times = time()\n",
    "score = AC(Ytest,prob.loc[:,\"y_pred\"].values)\n",
    "recall = recall_score(Ytest, prob.loc[:,\"y_pred\"])\n",
    "print(\"testing accuracy %f,recall is %f\" % (score,recall))\n",
    "print(datetime.datetime.fromtimestamp(time()-times).strftime(\"%M:%S:%f\"))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
